{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "sales_train = pd.read_csv(r'C:\\Users\\neversleep\\Desktop\\机器学习课设2\\future\\sales_train.csv')\n",
    "test= pd.read_csv(r'C:\\Users\\neversleep\\Desktop\\机器学习课设2\\future\\test.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "how many lines in train set: (2935849, 6)\n",
      "unique items in train set: 21807\n",
      "unique shops in train set: 60\n",
      "how many lines in test set: (214200, 3)\n",
      "unique items in test set: 5100\n",
      "unique shops in test set: 42\n"
     ]
    }
   ],
   "source": [
    "print('how many lines in train set:', sales_train.shape)\n",
    "print('unique items in train set:', sales_train['item_id'].nunique())\n",
    "print('unique shops in train set:', sales_train['shop_id'].nunique())\n",
    "print('how many lines in test set:', test.shape)\n",
    "print('unique items in test set:', test['item_id'].nunique())\n",
    "print('unique shops in test set:', test['shop_id'].nunique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------head---------\n",
      "         date  date_block_num  shop_id  item_id  item_price  item_cnt_day\n",
      "0  02.01.2013               0       59    22154      999.00           1.0\n",
      "1  03.01.2013               0       25     2552      899.00           1.0\n",
      "2  05.01.2013               0       25     2552      899.00          -1.0\n",
      "3  06.01.2013               0       25     2554     1709.05           1.0\n",
      "4  15.01.2013               0       25     2555     1099.00           1.0\n",
      "------information------\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 2935849 entries, 0 to 2935848\n",
      "Data columns (total 6 columns):\n",
      " #   Column          Dtype  \n",
      "---  ------          -----  \n",
      " 0   date            object \n",
      " 1   date_block_num  int64  \n",
      " 2   shop_id         int64  \n",
      " 3   item_id         int64  \n",
      " 4   item_price      float64\n",
      " 5   item_cnt_day    float64\n",
      "dtypes: float64(2), int64(3), object(1)\n",
      "memory usage: 134.4+ MB\n",
      "None\n",
      "-----missing value-----\n",
      "date              0\n",
      "date_block_num    0\n",
      "shop_id           0\n",
      "item_id           0\n",
      "item_price        0\n",
      "item_cnt_day      0\n",
      "dtype: int64\n",
      "--------nan value------\n",
      "date              0\n",
      "date_block_num    0\n",
      "shop_id           0\n",
      "item_id           0\n",
      "item_price        0\n",
      "item_cnt_day      0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print('----------head---------')\n",
    "print(sales_train.head(5))\n",
    "print('------information------')\n",
    "print(sales_train.info())\n",
    "print('-----missing value-----')\n",
    "print(sales_train.isnull().sum())\n",
    "print('--------nan value------')\n",
    "print(sales_train.isna().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>shop_id</th>\n",
       "      <th>item_id</th>\n",
       "      <th>item_cnt_month</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>31</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>486</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>787</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>794</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>968</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   shop_id  item_id  item_cnt_month\n",
       "0        2       31             1.0\n",
       "1        2      486             3.0\n",
       "2        2      787             1.0\n",
       "3        2      794             1.0\n",
       "4        2      968             1.0"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sales_train_subset = sales_train[sales_train['date_block_num'] == 33]\n",
    "sales_train_subset.head()\n",
    "\n",
    "grouped = sales_train_subset[['shop_id','item_id','item_cnt_day']].groupby(['shop_id','item_id']).agg({'item_cnt_day':'sum'}).reset_index()\n",
    "grouped = grouped.rename(columns={'item_cnt_day' : 'item_cnt_month'})\n",
    "grouped.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   ID  shop_id  item_id  item_cnt_month\n",
      "0   0        5     5037             NaN\n",
      "1   1        5     5320             NaN\n",
      "2   2        5     5233             1.0\n",
      "3   3        5     5232             NaN\n",
      "4   4        5     5268             NaN\n",
      "   ID  shop_id  item_id  item_cnt_month\n",
      "0   0        5     5037             0.0\n",
      "1   1        5     5320             0.0\n",
      "2   2        5     5233             1.0\n",
      "3   3        5     5232             0.0\n",
      "4   4        5     5268             0.0\n"
     ]
    }
   ],
   "source": [
    "test = pd.read_csv(r'C:\\Users\\neversleep\\Desktop\\机器学习课设2\\future\\test.csv')\n",
    "test = pd.merge(test,grouped, on = ['shop_id','item_id'], how = 'left')\n",
    "print(test.head())\n",
    "test['item_cnt_month'] = test['item_cnt_month'].fillna(0).clip(0,20)\n",
    "print(test.head())\n",
    "test = test[['ID','item_cnt_month']]\n",
    "submission = test.set_index('ID')\n",
    "submission.to_csv('submission_baseline.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 2935849 entries, 0 to 2935848\n",
      "Data columns (total 6 columns):\n",
      " #   Column          Dtype  \n",
      "---  ------          -----  \n",
      " 0   date            object \n",
      " 1   date_block_num  int16  \n",
      " 2   shop_id         int16  \n",
      " 3   item_id         int16  \n",
      " 4   item_price      float32\n",
      " 5   item_cnt_day    float32\n",
      "dtypes: float32(2), int16(3), object(1)\n",
      "memory usage: 61.6+ MB\n"
     ]
    }
   ],
   "source": [
    "def downcast_dtypes(df):\n",
    "    cols_float64 = [c for c in df if df[c].dtype == 'float64']\n",
    "    cols_int64_32 = [c for c in df if df[c].dtype in ['int64', 'int32']]\n",
    "    df[cols_float64] = df[cols_float64].astype(np.float32)\n",
    "    df[cols_int64_32] = df[cols_int64_32].astype(np.int16)\n",
    "    return df\n",
    "sales_train = downcast_dtypes(sales_train)\n",
    "test = downcast_dtypes(test)\n",
    "sales_train.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "sales_by_item_id = sales_train.pivot_table(index=['item_id'],values=['item_cnt_day'], \n",
    "                                        columns='date_block_num', aggfunc=np.sum, fill_value=0).reset_index()\n",
    "sales_by_item_id.columns = sales_by_item_id.columns.droplevel().map(str)\n",
    "sales_by_item_id = sales_by_item_id.reset_index(drop=True).rename_axis(None, axis=1)\n",
    "sales_by_item_id.columns.values[0] = 'item_id'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x15498449978>"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAD8CAYAAACLrvgBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3XeclNXZ8PHftbO9784WygILAtKLIGDsYAErJkZFE4kl5jHGWGIe9X3yqEmM0cToGxM1sRAxryVoEjWxgmCNhY50liK7sH3ZvrNtzvvH3LMMy5bZ2TL3DNf389nPzpy7zNlB55pTrnPEGINSSinlj4hgV0AppVTo0KChlFLKbxo0lFJK+U2DhlJKKb9p0FBKKeU3DRpKKaX8pkFDKaWU3zRoKKWU8psGDaWUUn6LDHYF+lpGRobJzc0NdjWUUiqkrF27tswYk9ndeWEXNHJzc1mzZk2wq6GUUiFFRL725zztnlJKKeU3DRpKKaX8pkFDKaWU38JuTEMpZS/Nzc0UFBTgcrmCXRUFxMbGkpOTQ1RUVEDXa9BQSvWrgoICkpKSyM3NRUSCXZ1jmjGG8vJyCgoKGDlyZED30O4ppVS/crlcOJ1ODRg2ICI4nc5etfo0aCil+p0GDPvo7b+FBg1lW8YY/r62gNrGlmBXRSll0aChbGt3aR0/eWUj/954MNhVUSFORPjud7/b9rylpYXMzEwuuOCCgO5XWVnJE0880fb8gw8+6PReZ5xxRlglHGvQULZVWNUAQFltY5BrokJdQkICmzdvpqHB89/U8uXLGTp0aMD3ax80jiXdBg0RWSIiJSKy2adsmoh8LiIbRGSNiMyyykVEHhORPBHZJCIn+FyzWER2WT+LfcpniMhX1jWPidXhJiLpIrLcOn+5iKT17Z+u7K6oyjNYV1bbFOSaqHCwYMEC3nzzTQBeeuklFi1a1HasoqKChQsXMmXKFObMmcOmTZsAuO+++7j22ms544wzGDVqFI899hgAd911F7t372batGn89Kc/BaC2tpZLL72UcePGcdVVV2GMOeL1n332WW677ba2508//TS33377Eee0trbyve99j0mTJjF58mQeffRR4MjWSllZGd719Z577jkWLlzIhRdeyMiRI/njH//II488wvTp05kzZw4VFRV99fa18WfK7XPAH4Hnfcp+A/zcGPO2iJxnPT8DWACMsX5mA08Cs0UkHbgXmAkYYK2IvGGMOWSdcwPwOfAWMB94G7gLeN8Y86CI3GU9v7NXf60KKcXVnqBRUadBI1z8/F9b2Hqwuk/vOWFIMvdeOLHb86644gp+8YtfcMEFF7Bp0yauvfZaPv74YwDuvfdepk+fzmuvvcbKlSu5+uqr2bBhAwDbt29n1apV1NTUcPzxx3PjjTfy4IMPsnnz5rZzPvjgA9avX8+WLVsYMmQIJ598Mp9++imnnHLKEa8/ZcoUfvOb3xAVFcVf/vIX/vznPx9Rxw0bNnDgwAE2b/Z8R6+srOz279q8eTPr16/H5XIxevRoHnroIdavX89tt93G888/z6233urfG+mnblsaxpiPgPbhygDJ1uMUwNvpfDHwvPH4HEgVkcHAucByY0yFFSiWA/OtY8nGmM+MJyw/Dyz0uddS6/FSn3J1jCiygkZ5nXZPqd6bMmUK+/bt46WXXuK888474tgnn3zSNuYxd+5cysvLqaqqAuD8888nJiaGjIwMsrKyKC4u7vD+s2bNIicnh4iICKZNm8a+ffuOOJ6QkMDcuXP597//zfbt22lubmby5MlHnDNq1Cj27NnDzTffzDvvvENycjLdOfPMM0lKSiIzM5OUlBQuvPBCACZPnnxUHfpCoMl9twLvisjDeALPN6zyoUC+z3kFVllX5QUdlANkG2MKAYwxhSKSFWBdVYgqqvIEi3Ltngob/rQI+tNFF13EHXfcwQcffEB5eXlbefuuJDg8NTUmJqatzOFw0NLS8Ww+f867/vrreeCBBxg3bhzXXHPNUcfT0tLYuHEj7777Lo8//jjLli1jyZIlREZG4na7AY7KsfB93YiIiLbnERERnda1NwIdCL8RuM0YMwy4DXjWKu9oArAJoLxHROQGa2xlTWlpaU8vVzZVUuNtaWjQUH3j2muv5Z577jnqG/5pp53GCy+8AHi6mjIyMrr8lp+UlERNTU2PX3/27Nnk5+fz4osvHjGm4lVWVobb7eZb3/oWv/zlL1m3bh3g2fJh7dq1ALz66qs9ft2+FGhLYzFwi/X4FeAZ63EBMMznvBw8XVcFeMY8fMs/sMpzOjgfoFhEBlutjMFASWeVMcY8BTwFMHPmzB4HHWVP3oHwirom3G5DRIQmiKneycnJ4ZZbbjmq/L777uOaa65hypQpxMfHs3Tp0g6uPszpdHLyySczadIkFixYwPnnn+93HS677DI2bNhAWtrRc3sOHDjANddc09aq+PWvfw3AHXfcwWWXXcZf//pX5s6d6/dr9QtjTLc/QC6w2ef5NuAM6/E8YK31+Hw8g9gCzAG+tMrTgb1AmvWzF0i3jq22zhXr2vOs8t8Cd1mP7wJ+409dZ8yYYVToa25pNSPv+reZdM87ZsSd/zaH6hqDXSUVoK1btwa7CrZy/vnnmxUrVgS1Dh39mwBrjB+fsf5MuX0J+Aw4XkQKROQ64PvA70RkI/AAntlP4Jn9tAfIA54GfmgFpgrgl1aAWA38wioDT1fXM9Y1u63AAfAgcLaI7ALOtp6rY0RpbSNuA+OHeLoIdNqtCnWVlZWMHTuWuLg45s2bF+zqBKzb7iljzNEdbx4zOjjXADd1cp8lwJIOytcAkzooL8fTilHHIG/X1ITByXy5t4Ly2kZGZyUGuVZKBS41NZWdO3cGuxq9phnhypa8ORoTrZaG5mqENtPB7CQVHL39t9CgoWzJ29KYOCQFgDINGiErNjaW8vJyDRw2YKz9NGJjYwO+h27CpGypqLqRKIdwXFYCABU6phGycnJyKCgoQKfD24N3575AadBQtlRc7SIrKZaYSAcpcVGaFR7CoqKiAt4lTtmPdk8pWyqqcjEoxdOEdiZEa1a4UjahQUPZUnGNi0HJVtBIjNaWhlI2oUFD2VJxlYusZM8aOuna0lDKNjRoKNupcTVT19Tq09KI0Sm3StmEBg1lO94cDe+YRkZCNBX1TbS6dcqmUsGmQUPZjndJ9GyrpZGeEI0xcKheWxtKBZsGDWU73s2XfLunQLPClbIDDRrKdtp3TzkTogEoq9UZVEoFmwYNZTtFVS5S4qKIjXIA2tIIReW1jfx708HuT1QhR4OGsp2i6sM5GuDJ0wDd9jWUvLw6nx+9uJ5DGujDjgYNZTvF1S6yUw4HjbT4aEQ8315VaDhQ2QB4kjRVeNGgoWynqMrFICuxD8ARIaTFR+te4SGk0AoaJdUa6MONBg1lKy2tbspqG9um23ppVnhoKbSWti+p0aARbjRoKFspq23CbTgqaDgTonUgPIR4p02XaPdU2NGgoWylfY6GV0ZiDGW6aGFIaGhqpbK+GYBSbWmEHQ0ayla8O/YNStHuqVDlDfyg3VPhSIOGshVvYt9R3VOJ0VQ1NNPc6g5GtVQPeAfBIyOEUh0IDzsaNJStFFW7iHJIWxa4l/e5zvu3P+8g+LjBSTqmEYY0aChbKa7ybPMaESFHlHuzwnXarf15u6em5KRq91QY0qChbKWo2kW2T46Gl7eloeMa9ldY1UBqfBTD0+Opb2qltrEl2FVSfajboCEiS0SkREQ2tyu/WUR2iMgWEfmNT/ndIpJnHTvXp3y+VZYnInf5lI8UkS9EZJeI/E1Eoq3yGOt5nnU8ty/+YGVvRdWuowbBwWcpEZ1BZXtFVS4Gp8SRleQJ/jqDKrz409J4DpjvWyAiZwIXA1OMMROBh63yCcAVwETrmidExCEiDuBxYAEwAVhknQvwEPCoMWYMcAi4ziq/DjhkjBkNPGqdp8JccZXrqEFwAGeC1T2lLQ3bK6xyMTgllqwkz79jSbWOa4STboOGMeYjoKJd8Y3Ag8aYRuucEqv8YuBlY0yjMWYvkAfMsn7yjDF7jDFNwMvAxSIiwFzgVev6pcBCn3sttR6/Csyzzldhqrax5YhtXn2lxEXhiBBtaYSAwipPa9G7x7uOa4SXQMc0xgKnWt1GH4rIiVb5UCDf57wCq6yzcidQaYxpaVd+xL2s41XW+SpMeXM0OmppRFjrT2lWuL25mlupqGticHIsmYkaNMJRZC+uSwPmACcCy0RkFNBRS8DQcXAyXZxPN8eOICI3ADcADB8+vMuKK/vqLEfDKyMxmjLtnrI13w20UuOjiHZE6LTbMBNoS6MA+Ifx+BJwAxlW+TCf83KAg12UlwGpIhLZrhzfa6zjKRzdTQaAMeYpY8xMY8zMzMzMAP8kFWydZYN7ebLC9VurnXlzNAanxCEiZCbFaIJfmAk0aLyGZywCERkLROMJAG8AV1gzn0YCY4AvgdXAGGumVDSewfI3jDEGWAVcat13MfC69fgN6znW8ZXW+SpMdbbulJczMUa7p2zOG/gHp3r+DTOTYijVQB9Wuu2eEpGXgDOADBEpAO4FlgBLrGm4TcBi6wN9i4gsA7YCLcBNxphW6z4/At4FHMASY8wW6yXuBF4WkfuB9cCzVvmzwF9FJA9PC+OKPvh7lY0VV7tIjo0kLtrR4XGnrj9lewerPEuIeAN/VlIMX5fXB7NKqo91GzSMMYs6OfSdTs7/FfCrDsrfAt7qoHwPntlV7ctdwLe7q58KH0VVHedoeDkToqlpbKGxpZWYyI4DiwquoipP4E+I8Xy0ZCXHsHpfh73KKkRpRriyjeLqjnM0vLxLiWgXlX0VWol9XpmJsRyqb6apRReaDBcaNJRtFFW7Oh3PAM9AOGiCn521by16czV0XCN8aNBQttDS6qa0prHL7qmMtqVENGjYVWGViyGpPkHDWkpEs8LDhwYNZQudbfPq63BLQ7+12lFjSytltY0MSj7cPeVdSkTXnwofGjSULRR3M90WdEzD7kqsfIzBHXRPaVZ4+NCgoWyhqJtscIDk2EiiHKJZ4TZV2EFypjMhGhENGuFEg4ayhbYlRFKO3kvDS0Q0K9zGCq0cDd+WRqQjAmdCNKW6lEjY0KChbKGoykVkhJCR0HnQAM8S6do9ZU+Hs8HjjijPTIpt67pSoU+DhrKFomoXWUkxR23z2p4zMZoyDRq2VFjlIikmksSYI3OGs5JitHsqjGjQULZQXO0iu4vptl7OhGgqdE8NWyqsauhwynRWUozOngojGjSULRRVdZ3Y5+VMjNHkPpvqbBmYrOQYymobcbt1vdFwoEFD2UJxdWOXM6e80hOiqW9qpaGpdQBqpXrCu81re1lJsbS4DRX1GuzDgQYNFXS1jS3UNrZ0mQ3udTgrXLs77KS51U1pbSODUuKOOpbZlhWu/2bhQIOGCrq2zZf8aml4PoC0i8peSmoaMQaGdDKm4TlHp92GAw0aKuhK/Ejs83JaLQ2ddmsvhZXWPhqddE+BJviFCw0afayirolzH/2IDfmVwa5KyCiq7nqbV1/ePI4yTfCzFd9tXttrW+lWg0ZY0KDRx1ZsK2ZHcQ2f7S4PdlVCxuElRLpO7ANI15aGLXW1v3tslIOk2EgNGmFCg0YfW7W9BICvy+uCXJPQUVzlIik2kvjobjeSJCHaQUxkhC6PbjOFVS7iox0kx3b8b+hJ8NMxjXCgQaMPNbW4+XhXGQD7NGj4rbvNl3yJCM6EaO2espmi6gYGp8Qi0nFGf2ZSjM6eChMaNPrQ6n0V1Da2kJEYw9fl9cGuTsgoqu5686X2nIm6/pTdHKx0dTie4ZWVFKsD4WFCg0YfWrm9hOjICC6dkUNhlQtXsyag+aO4quu9wdtzJkbrlFub6Swb3MvbPWWMZoWHOg0afWjV9hLmjHIyYUgyAPsrtLXRnVa38SSF9SBopCdEa0vDRlpa3ZTUdJwN7pWVHIOr2U1tY8sA1kz1Bw0afWRvWR17yuqYe3wmuc54APaV6bhGd8pqG2l1G78WK/TKSPSsZaTfWu2htLYRt+l6yrTmaoQPDRp9ZKU1a2ruuGxGpCcA6LiGH3qSDe6VnhBNY4ubOl1/yha8ORpDuhzT0KVEwkW3QUNElohIiYhs7uDYHSJiRCTDei4i8piI5InIJhE5wefcxSKyy/pZ7FM+Q0S+sq55TKzpFyKSLiLLrfOXi0ha3/zJ/WPV9hJGZyUy3BlPSnwUafFROoPKD0V+7A3enjPBytXQcQ1bKKzsPjkzU5cSCRv+tDSeA+a3LxSRYcDZwH6f4gXAGOvnBuBJ69x04F5gNjALuNcnCDxpneu9zvtadwHvG2PGAO9bz22ptrGFL/aWM3dcVlvZCGeCtjT8UOLHNq/tZSRaWeG6aKEtdLTNa3ve7ilN8At93QYNY8xHQEUHhx4F/hvw7Vi+GHjeeHwOpIrIYOBcYLkxpsIYcwhYDsy3jiUbYz4zng7q54GFPvdaaj1e6lNuO5/sKqW51RwRNHKd8drS8ENRtQtHhODsZptXX+na0rCVoioXsVERpMRFdXpOclwk0ZEROqYRBgIa0xCRi4ADxpiN7Q4NBfJ9nhdYZV2VF3RQDpBtjCkEsH5nYVMrt5eQFBvJjBGHe9BGOBM4WNlAY4v2u3elqKqRrKQYHN1s8+rLqcuj20phtSdHo7PEPvAkZeoOfuGhx0FDROKB/wHu6ehwB2UmgPKe1ukGEVkjImtKS0t7enmvuN2GldtLOX1sJlGOw2/nCGc8bgMFhxoGtD6hpri6ZzkaQFurpExbGrbg766LupRIeAikpXEcMBLYKCL7gBxgnYgMwtNSGOZzbg5wsJvynA7KAYqt7ius3yWdVcgY85QxZqYxZmZmZmYAf1LgNh+soqy28YiuKfC0NAD267hGl3qyhIhXXLSD+GiH5mrYRGFlA4NT/QkasTp7Kgz0OGgYY74yxmQZY3KNMbl4PvhPMMYUAW8AV1uzqOYAVVbX0rvAOSKSZg2AnwO8ax2rEZE51qypq4HXrZd6A/DOslrsU24rK7eXIAKnjz0yWLXlaui4RpeKu8kk7ownK1w/gIKt1W0ormnschDcKzMpRsc0woA/U25fAj4DjheRAhG5rovT3wL2AHnA08APAYwxFcAvgdXWzy+sMoAbgWesa3YDb1vlDwJni8guPLO0HuzZnzYwVm4vYfqwVJyJRw7kpidEkxQTqTOoulDX2EJNY0uPu6fAs4OfrnQbfN7kzI62eW0vKymGqoZmXV4nxHW7FrUxZlE3x3N9Hhvgpk7OWwIs6aB8DTCpg/JyYF539QumkhoXmwqquOOcsUcdExFGZOgMqq4c3nzJ/5lTXhkJ0W1JZSp42jZf8mdMw2czpmHp8f1aL9V/NCO8Fz7Y4Rl0P3NcxxO7NFeja8VV/m/z2l56QrTOnrKBoqrOt3ltry1XQ7sVQ5oGjV5Yua2EQcmxTBic3OHxXGc8+RX1tLS6B7hmoSGQbHAv7/Louv5UcLUtIZLaffdUpi4lEhY0aASoqcXNJ3llnDkuq9P56SOcCbS4DQcrtRulI8XWh0cgA+EZidE0txqqXbpqajAVVrmIjowgLb7zxD6vw91T+v9DKNOgESDvhkvtp9r6yrWm3eq4RseKq/3f5rW9tqxwHQwPqsIqV5c79vlyJsQQIbrSbajToBGg97d5Nlw6ebSz03O8027Dab/wyvqmPtsToaiHmy/58s5W02m3wVVU1eB396IjQnAm6ravoU6DRoBW7SjhpFHOLr8lZybFEBflYF+YDIZX1Tdz3u8/ZsHvP+JgZe8z3QNJ7PPyrnSrWeHB5W1p+EuzwkOfBo0A7CmtZW9ZXZddU2BNu3XGh01L439f30xJTSOVdc1c9cwXbSvUBiqQJUS8vOtPafdU8LjdhuJqF4P9GAT3ykqK0dlTIU6DhqWoysVeP3faO7zhUvdrKI5wxodFS+P1DQd4Y+NBbj1rDM9dO4viahdXPvMFZQF+ALS6DSU1jQHlaMDhMQ3tngqesrpGmltND1saupRIqNOgYXnone2c++hHPPLejm4zVlftKGFMVqJfCUq5zgT2l9fT6g7dqaEHKxv439c2c8LwVP7r9OOYMSKNJd87kYJD9XznmS+orO/5t/1ybyZxgC2NmEgHSTGRmhUeRIHsupiVHNOWRa5CkwYNy90LxrFg8iAeW5nHWY98yIqtxR2eV+Nq5su9Fcwd799K7SOcCTS1uttyEkKN22346asbaXEbHr18GpHWSr5zRjl5+uqZ7Cmr47vPfkm1q7lH9/W+H4F2T4G1/pQGjaBpywb3YwkRr8ykGNxGl7UPZRo0LFnJsfz+ium8+P3ZxEY5uP75NVy/dDX5FUd2LX2yq8yz4dLx/gWNUJ9B9Zf/7OPTvHLuuWBC28q9XqeOyeTJq05ge1E131vypd+zqqpdzby+wbOYcSA5Gl7OxBjtngqitpZGDwfCQRP8QpkGjXa+cVwGb/34VO5eMI7/7C7nrEc+5A/v72rbTGnl9hKS22241JURGZ4P2lBcTmRncQ0PvbOds8Znc/mJwzo8Z974bP6waDobC6q47rnVNDR13rW3v7yen/9rC9/49Uqe/WQvp47J4PhBSQHXLz0hWgfCg6iwykW0I6JtJps/MnXb15DX86yqY0B0ZAQ/OP04Lpo2hPv/vY3fLd/J39cVcN9FE1m1o5TTj89q66bpzuDkWKIjI0Iuwa+pxc2tL28gKSaSB781ucvkrfmTBvPo5YZbX17PDX9dw9NXzyQ2ygGAMYYv91bw7Cd7Wb6tGIcIF0wZzLWnjGRKTmqv6piRGM36/ZW9uocKXGFVA9kpMUT0YNfFtpaGTrsNWRo0ujA4JY7HrzqBy3eWcu8bW/jeX1YDMHec/xs9RUQIw9Pj+bostFoaj67YydbCap65eiYZid3PcLpo6hCaWtzc8cpGfvjCOv6waDrvbiliyad72XygmtT4KH54xnFcfVJur8YxfKUnRHOovgm32/Tog0v1jcIqF4OT/R/PgMPrT2lLI3Rp0PDDaWMzeefWU3nm4718sKOEucdn9+j6XGdoLZH+5d4K/vThbhbNGsZZE/z/Wy+dkUNjSyv/88/NnPDL5TS2uBmdlcgDl0zmkulDiYt29Gk9nQkxtLoNVQ3NpPWgi0T1jaIqF9OG9ay1GBvlICUuSpcSCWEaNPwUE+ngpjNHc9OZo3t87QhnAp/mlWOM8WuNnmCqcTVz+7INDE+P52fnT+jx9VfNHoEgfLCjhCtnD+e0MZn91grwJviV1zVq0BhgxhiKqlwMntTzVmNWki4lEso0aAyAXGc8Dc2tlNY0ktVHXTP95ef/2srBygZe+a9vkBAT2H8eV84ezpWzh/dxzY7mTPCuP9XEaP8ms6k+UlHXRFOrO6DZb5m6lEhI09lTA2BE22q3fT+u4XabHudIdOadzYW8uraAm84c7ffssGA63NLQGVQDLZAcDa8s3Ss8pGnQGAAjrFyN/hjX+Mt/9jHngffZXVrbq/uU1jRy9z++YvLQFH48b0wf1a5/ead6atAYeIeDRgDdU8mxlNQ06gZaIUqDxgAYmhpHZIT0S4LfvzcdpL6plZ++sjHgpRmMMfyff35FXVMrj14+lSg/pxMHW5quPxU03m1eAwoaSTE0tbh1A60QFRqfDiEu0hFBTlpcn3dPldS42JBfydScFNbtr2TJJ3sDus/rGw6yfGsxd5wzltFZgSfbDbQoRwSp8VGU6/LoA66wykWktT9GTx2edqvjGqFIg8YAGeFM6POWxqrtJRgDD3xzMmdPyObh93b0uJuquNrFPa9vZsaINK47ZVSf1m8gaFZ4cHg30HIEMDMuy8oK1xlUoUmDxgDJdXoS/PqyH3f51hKGpsYxYXAyv1o4idgoR4+6qYwx3P2Pr2hqdfPwt6cG9AEQbBkJMQEvz64C19PNl3xltmWF679bKNKgMUBGOBOoaWzhUH3fzHRqaGrlk7xSzhqfhYiQlRzLzy+a2KNuqlfWFrByewl3zh/HyIyE7i+wIW1pBEdhVUPAi01mJetSIqGs26AhIktEpERENvuU/VZEtovIJhH5p4ik+hy7W0TyRGSHiJzrUz7fKssTkbt8ykeKyBcisktE/iYi0VZ5jPU8zzqe21d/dDDkZvTtDKpP88pwNbuPyNi+eNoQzhrvXzfVwcoGfvmvrcwamc7ik3L7pE7BoMujDzxjTK9aGkkxkcRGRWj3VIjyp6XxHDC/XdlyYJIxZgqwE7gbQEQmAFcAE61rnhARh4g4gMeBBcAEYJF1LsBDwKPGmDHAIeA6q/w64JAxZjTwqHVeyPLmavTVuMaKbcUkxkQye6SzrUxEeOCS7rupjDHc+fdNtBrDw5dODel1m5yJMRyqb9JNfQZQZX0zjS1uBgWQowGe/06zkmJ129cQ1W3QMMZ8BFS0K3vPGOOdL/c5kGM9vhh42RjTaIzZC+QBs6yfPGPMHmNME/AycLF41tSYC7xqXb8UWOhzr6XW41eBeWL3NTi6kJMWR4TAvj5YuNDtNqzYVsLpx2cSHXnkP6E/3VQvfZnPx7vKuPu88Qx3dr/7oJ05E6IxBg4FsHugCkxvcjS8dCmR0NUXYxrXAm9bj4cC+T7HCqyyzsqdQKVPAPKWH3Ev63iVdf5RROQGEVkjImtKS0t7/Qf1h5hIB0NS4/qkpbGxoJKy2kbOHt/xYoJddVPlV9Tzqze3cvJoJ1fN6v+lPvpbW1a4TrsdMEXVgedoeGUl61IioapXQUNE/gdoAV7wFnVwmgmgvKt7HV1ozFPGmJnGmJmZmf4vWz7Qcp0JfZKrsWJbMY4I4YzjO/5bO+umcrsN//3qJkSEh741JaS7pbzSEw4vWqgGxsHKwJcQ8cpM1KVEQlXAQUNEFgMXAFeZw/NICwDfLd5ygINdlJcBqSIS2a78iHtZx1No100WaoY74/ukpfH+thJOzE0jNb7zlV2zkmO576IJR3RT/b8vvuazPeX87Pzx5KSFdreUl3evD21pDJyiKheOCGmbOhuIrORYalwtuJo73+lR2VNAQUNE5gN3AhcZY3y/Or8BXGHNfBoJjAG+BFYDY6yZUtF4BsvfsILNKuB/OxZEAAAcw0lEQVRS6/rFwOs+91psPb4UWGlCfLGaXGc8h+qbqerFtNv8inq2F9VwViddU74WThva1k21ansJv35rO6ePzex069ZQ5NSlRAbcgcoGspJiepXXk6l7hYcsf6bcvgR8BhwvIgUich3wRyAJWC4iG0TkTwDGmC3AMmAr8A5wkzGm1RqT+BHwLrANWGadC57gc7uI5OEZs3jWKn8WcFrltwNt03RDVdsMqorAWxsrthUDcLYfmyP5dlNd89xqIh3S7datoSY1PhoRNFdjgJTUuHhncxGzRqb36j7ebV9La3VcI9R0u2GCMWZRB8XPdlDmPf9XwK86KH8LeKuD8j14Zle1L3cB3+6ufqEk12eJ9ED3x16xrZgxWYltAag7Wcmx/OLiidz6tw3cd+HEXvVD25EjQshKiuHT3eX8uNXt997tKjCPvb+L5lY3t501tlf30aVEQpf+HzaAhqd7xhG+LguspVHV0MwXeyp6tAUrwMXThrL+f8/mWzNyuj85BP303HGs/foQv31vR7CrEtb2ltXx0pf5LJo1nNxeriBwOCtcg0ao0aAxgOKiHQxKjg14BtWHO0tpcRu/xjPa62rQPNRdOiOHK2cP588f7uGdzUUB32d3aS3L1uTrPg+dePi9HcRERnDzvJ5vedxeenw0jgjRabchSLd7HWAjejGDasXWYjISo5k2LLCurXB274UT2HKgijte2cjY7ERGZSb26PodRTVc+fTnlNc1MW1YKmOzQ2eJ+IGwMb+SNzcV8uO5o9u6lnojIkLISIzW7qkQpC2NARZorkZzq5tVO0qYOy4rJFej7W8xkQ6e+M4MohzCjf9vHfVN/m/w4w0Y3gkC3skGysMYw4Nvbyc9IZrvn9Z3y+dnJcVq91QI0qAxwEZkxFNW20hdY892LVu9t4IaV0tAXVPHiqGpcfz+iunsLKnh7n985Vc3kzdgRDqEZT+Yw6ShyazcVjIAtQ0dH+0q47M95dw8dzRJsVF9dt+spBhKNWiEHA0aAyy3beHCnrU2lm8rJiYyglPGZPRHtcLGaWMzuf2ssby+4SDPf/Z1l+f6BoyXvj+HUZmJzB2Xzbr9h3QKr8XtNjz09naGpcdx5ey+XXbGs5SIBo1Qo0FjgI2wFgjsybiGMYYV24o5ZXQG8dE6DNWdm84czbxxWdz/5lbWfn2ow3N2Fh8dMADOGp+F23h2RVTwr00H2VpYzU/OPp6YSEef3jszKZbyukZaWt19el/VvzRoDLARPrka/tpZXEt+RUOPp9oeqyIihEcum8bglDhuemHdUTv77SyuYdFTRwcMgElDUshMimGlBg2aWtw8/N4Oxg9O5qKpQ/r8/plJMRiD7ocSYjRoDLDEmEgyEqN71NLwDszOG5fVX9UKOynxUTz5nRM4VN/EzS+ub/s221XAAE/AmTcuiw93ltLUcmx/A37xi6/Jr2jgrgXj+mVxyyxdSiQkadAIghHOhB7t4Ld8azFTc1LISu79VMdjycQhKdy/cBKf7Snn4fd2dhswvOaNz6a2sYXV+0J6fcxeqXE189jKPE4a5eS0fhpHG5rqWZ1gf0XvV35WA0eDRhB4cjX8+x+lpMbFhvxKnTUVoG/PHMaiWcP504e7ufTJ/3QbMABOHu0kOjLimJ56+/THe6moa+KuBeP6ba2ysdlJRDsi2FRQ2S/3V/1Dg0YQ5DoTKKxy+bUstHf6p45nBO7eCycwdVgqcdGObgMGQHx0JCcf5+T9bSXHZHZ4aU0jz3y8h/MnD2ZqPyaSRkdGMH5IMhs1aIQUDRpB4J1B5U+zfMW2YoamxjFukGYoByo2ysErPziJD+440+9M8bnjs9lfUX/UzofHgj+s3EVji5ufnNO7RQn9MTUnha8KqnSP9xCiQSMI2la77WbhwoamVj7eVcbZE7LDajnzYIiOjCAu2v8po95JByuOsUS/fWV1vPjFfq44cViPl2IJxNScVOqaWo/J4ByqNGgEgTdorN5XQV5JDQWH6im3ssR9v3F9kldGY4tbxzOCYEhqHBMGH3vZ4b99bwdRjghumTdmQF7P2/21MV+7qEKFZooFQUp8FFlJMTz98V6e/njvUcejIyOIi3LQ0uomKSay1xveqMDMG5/F46vyOFTXRFpC+K4S7LViazFvbirklnljBmym3qiMBJJiItlYUMm3Z4bPjpLhTINGkCz7wUnkldTS0NxKQ3MrruZWGppaDz+3Hs8a6ZnJowbevPHZ/GFlHh/uLGXh9KHBrk6/qqhr4q5/fMX4wcncdGbvlz73V0SEMDknhY35VQP2mqp3NGgESW5GQq83slH9a8rQFDISY1ixrTisg4Yxhp+99hVVDU389bpZA/4lZUpOKs9+sgdXcyuxUX27VInqe/oVVqlOREQIc8dl8uHOUprDeH2kNzYe5K2virjt7LGMH5w84K8/bVgKza2GbYXVA/7aquc0aCjVhbnjsqlxhW92eHG1i3te38L04anccGrf7ZXRE97B8E0F2kUVCjRoKNWFU8dkEO2I4P0wnEVljOG/X91EY0srj1w2jUhHcD4OBiXHkpkUozOoQoQGDaW6kBATyUnHOcNy1duXvsznw52l3L1gPCODOL4mIkzNSWWDZoaHBA0aSnVj3vgs9pbV2TYBbVNBJd984lPe/qrQ72VP9pfXc/+bWzl5tJPvzhnRzzXs3tScFPaU1lHtag52VVQ3NGgo1Y25Vna4XRP9HnpnO+v2V3LjC+v4/vNrOFDZ0OX5brfhjlc34hDhN5dO7Zdlz3vKO67xlY5r2F63QUNElohIiYhs9ilLF5HlIrLL+p1mlYuIPCYieSKySURO8LlmsXX+LhFZ7FM+Q0S+sq55TKz1Mjp7DaUGWk5aPOMGJdly1du1X1fwaV45dy0Yx8/OH8+neeWc/ciHPPPxnk53xFvy6V6+3FvBPRdOaFuePNim5KQA6OKFIcCflsZzwPx2ZXcB7xtjxgDvW88BFgBjrJ8bgCfBEwCAe4HZwCzgXp8g8KR1rve6+d28hlIDbt74LNZ8fYiqent1nzz2fh7pCdFcfdIIrj91FMtvP42TRjm5/81tXPz4p0ctO76ruIbfvLuDs8Znc+mMnCDV+mip8dHkOuN1MDwEdBs0jDEfAe3nG14MLLUeLwUW+pQ/bzw+B1JFZDBwLrDcGFNhjDkELAfmW8eSjTGfGU9n7PPt7tXRayg14OaOy6bVbfhgp326qDbkV/LhzlK+f+qotr3jc9LieWbxTJ686gRKaxpZ+Pin3PfGFmobW2hudfOTVzaSEO3g19+cbLtFMKcOS9XM8BAQ6JhGtjGmEMD67d2HdCiQ73NegVXWVXlBB+VdvcZRROQGEVkjImtKS0sD/JOU6ty0Yak4E6JtNfX2jyt3kRofxXdPOnIgW0RYMHkwK35yOlfNHsHSz/Zx1u8+5PZlG9lUUMWvLplMprXVqp1MyUmlqNpFcbUr2FVRXejrgfCOvrqYAMp7xBjzlDFmpjFmZmZmZk8vV6pbjgjhzHFZfLCjpNOxgoG0+UAVK7aVcN3JI0mM6Xg1oOTYKH65cBJ/v/EbpMZH8a+NB7l42hDOmzx4gGvrn2nDrHEN7aKytUCDRrHVtYT12/v1qwDwXaoyBzjYTXlOB+VdvYZSQTFvXBbVrhbWfH0o2FXhjyvzSIqNZPHJud2ee8LwNP518yk89d0Z/Pqbk/u/cgGaOCQFR4ToYLjNBbpg4RvAYuBB6/frPuU/EpGX8Qx6VxljCkXkXeABn8Hvc4C7jTEVIlIjInOAL4CrgT908xpKBcWpYzOJcggrt5cwZ5TzqOOV9U2s31/J+v2H2HSgCmMgJS6K5LhIkmOjrMfWb+v5mOzEHi/St6Oohne2FPHjuaNJjo3y65ooRwTnTBzUo9cZaLFRDo7PTtLlRGyu26AhIi8BZwAZIlKAZxbUg8AyEbkO2A982zr9LeA8IA+oB64BsILDL4HV1nm/MMZ4B9dvxDNDKw542/qhi9dQKigSYyKZM8rJim3F/Pe5x7OjuIb1+ytZt/8QG/ZXssfaiTFCYGx2EjGREXxdXke1q4WqhuYOtzQdm53Ish+cRGq8//t1/GHlLhKiHVx7ysg++9vsYuqwVN7cdBBjjO0G6pVHt0HDGLOok0PzOjjXADd1cp8lwJIOytcAkzooL+/oNZQKpnnjsrjvX1uZfN97NDS3ApCRGM304WlcOjOH6cPSmJKTQkK7cQZjDPVNrVQ1NFPtaqa6oYW9ZbX872tbuPa51bxw/Ry/tqPNK6nlza8K+a/Tj+tRoAkVU3NSeOnL/ewrrw/q0iaqc7qfhlI9cOHUIXySV05OWhzTh6dywvA0ctLiuv1WLCIkxESSEBPJEDwJdbNGppMcG8UPX1zHD19Yy1NXzySqm0UDH1+VR2ykg+vDsJUBR27/qkHDnnQZEaV6wJkYwzOLZ3LfRRO5eNpQhqXH96obZcHkwdy/cBKrdpRy56ubcHfQheW1r6yO1zcc4DtzhuNMtN+U2b4wJiuRuCgHG3QGlW1pS0OpILtq9gjKa5t4ZPlOnInR/M/5Ezo87/FVeUQ5Ivj+acHZ92IgRDoimDQ0+ahMdmUf2tJQygZunjuaxSeN4OmP9/LnD3cfdTy/op5/rj/AolnDyUqKDUINB87UnFQ2H6wO690SQ5kGDaVsQES498KJXDBlML9+ezuvrMk/4vgTH+wmQoT/Ov24INVw4EwZlkpTi5sdRTXBrorqgAYNpWwiIkJ45LJpnDomg7v+8RUrtnpW1T1Y2cCra/O57MQcBqWEdysDYFqONRiuXVS2pEFDKRuJjozgye/MYOKQZG56cR2r91Xwpw93YwzHRCsDYFh6HGnxUbqciE1p0FDKZhJjIvnL905kaGoc1z23mpdX53PpjBxy0uKDXbUBISJMyUnVzHCb0qChlA05E2N4/rpZxEU7aHUbfnjG6GBXaUBNHZbKzuIa6hpbgl0V1Y5OuVXKpnLS4vn7jd8gv6KB4c5jo5XhNTUnBbfxrOY7u4N1vlTwaEtDKRvLSYvnpOOOvQ/NKdZguHZR2Y8GDaWU7WQmxTA0NY4NOoPKdjRoKKVsaeqwFM0MtyENGkopW5qak0p+RQPltY3BroryoUFDKWVLOq5hTxo0lFK2NDknBRHNDLcbDRpKKVtKjIlkTFaiZobbjAYNpZRtTclJZWNBFZ5NQZUdaNBQStnW1GGpVNQ1UXCoIdhVURYNGkop25qakwLAf3aXBbkmykuDhlLKtsYNSmZQcix3/v0rvvnEpyxbna/rUQWZhFtf4cyZM82aNWuCXQ2lVB8pr23kH+sO8PLq/ewurSMh2sGFU4dw+YnDmDYstVd7tKvDRGStMWZmt+dp0FBKhQJjDOv2H+LlL/P596ZCGppbOT47ictPHMYl04eSlhAd7CqGNA0aSqmwVeNq5l8bC/nbmnw25lcS7Yhg0axh3H3eeGKjHMGuXkjyN2j0akxDRG4TkS0isllEXhKRWBEZKSJfiMguEfmbiERb58ZYz/Os47k+97nbKt8hIuf6lM+3yvJE5K7e1FUpFT6SYqO4cvZwXr/pZN6+5VS+NSOHpZ99zcLHP2V3aW2wqxfWAg4aIjIU+DEw0xgzCXAAVwAPAY8aY8YAh4DrrEuuAw4ZY0YDj1rnISITrOsmAvOBJ0TEISIO4HFgATABWGSdq5RSbcYPTubX35zMX645kZKaRi78wye8tv5AsKsVtno7eyoSiBORSCAeKATmAq9ax5cCC63HF1vPsY7PE88I1sXAy8aYRmPMXiAPmGX95Blj9hhjmoCXrXOVUuooZx6fxVs/PpVJQ1K49W8buPPVTTQ0tQa7WmEn4KBhjDkAPAzsxxMsqoC1QKUxxjsnrgAYaj0eCuRb17ZY5zt9y9td01n5UUTkBhFZIyJrSktLA/2TlFIhblBKLC9+fzY3nXkcy9bms/DxT8krqQl2tcJKb7qn0vB88x8JDAES8HQltecdae9oXpwJoPzoQmOeMsbMNMbMzMzM7K7qSqkwFumI4KfnjmPpNbMoq23kwj98yqtrC4JdrbDRm+6ps4C9xphSY0wz8A/gG0Cq1V0FkAMctB4XAMMArOMpQIVvebtrOitXSqlunTY2k7duOZWpw1K445WN3PHKRuqbNDGwt3oTNPYDc0Qk3hqbmAdsBVYBl1rnLAZetx6/YT3HOr7SeOb7vgFcYc2uGgmMAb4EVgNjrNlY0XgGy9/oRX2VUseY7ORYXrh+Dj+eN4a/ryvgksf/Q0mNK9jVCmm9GdP4As+A9jrgK+teTwF3AreLSB6eMYtnrUueBZxW+e3AXdZ9tgDL8AScd4CbjDGt1rjHj4B3gW3AMutcpZTymyNCuP3ssSy9Zhb5h+pZ9NTnlFRr4AiUJvcppY4ZX+6t4Jq/fEl2ciwvfn8Og1Jig10l2xiQ5D6llAols0ams/TaWZTUNHLFU59RWKVLrveUBg2l1DFlZm46z183i/LaJi7/8+ccqNTA0RMaNJRSx5wThqfx1+tnc6i+icv//Bn5FfXBrlLI0KChlDomTRuWygvXz6a6oZkrnvpcA4efNGgopY5ZU3JSefH7c6hrauHyP3/G1+V1wa6S7WnQUEod0yYNTeHF6+fQ0NzK5X/+nL1l9goca/ZV8OOX1rNsTT6H6pqCXR2dcquUUgDbi6q56ukviHQIb/zoFLKTgz8d9+vyOi5+/FNqXS20uA2OCGH2yHTOnTiIcyZmMzglrs9eSzdhUkqpHtpeVM3Cxz9l9kgnz11zYlC3kq1xNXPJE/+hrLaR1354MrWNLbyzuYh3thSRV+LZM2TqsFTmTxzEuROzGZWZ2KvX06ChlFIBeP6zfdzz+hYeuGQyV84eHpQ6tLoN1y9dzce7ynj+ull847iMI47nldTy7pYi3ttSxMaCKgDGZidy/8LJzBqZHtBr+hs0Irs7QSmljiXfmT2C97YUc/+bWzl5tJMRzoQBr8ND72xn1Y5S7l846aiAATA6K5HRWaO56czRHKhs4L0tRby7pQhnYv/vk64D4Uop5SMiQvjNpVNwRAh3vLKRVvfA9sa8siafpz7aw9UnjeA7c0Z0e/7Q1DiuOXkkL99wEsf1sovKHxo0lFKqnSGpcfz8ooms3neIZz7eM2Cvu2ZfBf/zz82cMjqDey6w5+7WGjSUUqoDl0wfyrkTs/ndezvZUdT/u/8VHKrnB39dy9C0OB6/8gQiHfb8eLZnrZRSKshEhAcumUxSbCS3L9tAU4u7316rrrGF65euoanVzdNXzyQlPqrfXqu3NGgopVQnnIkx/Pqbk9lysJo/rNzVL6/hdhtu+9sGdhbX8McrT2B0Vv+PS/SGBg2llOrCORMH8a0Tcnjig92s33+oz+//yPKdvLe1mJ+dP4HTx2b2+f37mgYNpZTqxr0XTSA7KYafLNtIQ1Nrn9yzudXNMx/v4Y+r8rjixGFcc3Jun9y3v2nQUEqpbiTHRvHbb09lT1kdD72zvVf3anUbXlt/gLMf+ZD739zGaWMz+cXFk4Kafd4TmtynlFJ+OHl0Bt/7Ri7P/WcfZ0/I5uTRRyfddcUYwzubi3hk+U52ldQyfnAyz1w9k3njs0ImYIAGDaWU8tud88fx0c5SfvrKRn6/aDpjs5K6nelkjOGDHaU8/N4Othys5rjMBB6/8gQWTBpEREToBAsvXXtKKaV6YP3+Q1z+589pavVMwc1KimFMdiJjspIYk53I2OwkxmQlkhofzX/yynj4vR2s21/JsPQ4bp03loXTh+KwYbDQBQuVUqqflFS72Hywip3FtewqriWvpIZdJbXU+wySp8RFUdXQzOCUWG6eO4Zvz8whyqYJe6ALFiqlVL/JSo5lbnIsc8dlt5W53YaDVQ3sKqklr7iW3aWecYvLTxxGbJQjiLXtW70KGiKSCjwDTAIMcC2wA/gbkAvsAy4zxhwSz0jP74HzgHrge8aYddZ9FgM/s257vzFmqVU+A3gOiAPeAm4x4dY0UkqFhYgIISctnpy0eM48PivY1ek3vW0r/R54xxgzDpgKbAPuAt43xowB3reeAywAxlg/NwBPAohIOnAvMBuYBdwrImnWNU9a53qvm9/L+iqllOqFgIOGiCQDpwHPAhhjmowxlcDFwFLrtKXAQuvxxcDzxuNzIFVEBgPnAsuNMRXGmEPAcmC+dSzZGPOZ1bp43udeSimlgqA3LY1RQCnwFxFZLyLPiEgCkG2MKQSwfnvbaUOBfJ/rC6yyrsoLOihXSikVJL0JGpHACcCTxpjpQB2Hu6I60tEcMxNA+dE3FrlBRNaIyJrS0tKua62UUipgvQkaBUCBMeYL6/mreIJIsdW1hPW7xOf8YT7X5wAHuynP6aD8KMaYp4wxM40xMzMz7b/gl1JKhaqAg4YxpgjIF5HjraJ5wFbgDWCxVbYYeN16/AZwtXjMAaqs7qt3gXNEJM0aAD8HeNc6ViMic6yZV1f73EsppVQQ9DZP42bgBRGJBvYA1+AJRMtE5DpgP/Bt69y38Ey3zcMz5fYaAGNMhYj8ElhtnfcLY0yF9fhGDk+5fdv6UUopFSSaEa6UUurYXUZEREqBrwO8PAMo68PqDBSt98AL1bprvQdWKNV7hDGm20HhsAsavSEia/yJtHaj9R54oVp3rffACtV6d8W+q2cppZSyHQ0aSiml/KZB40hPBbsCAdJ6D7xQrbvWe2CFar07pWMaSiml/KYtDaWUUn7ToGERkfkiskNE8kSkqzW0bEVE9onIVyKyQURsm6AiIktEpERENvuUpYvIchHZZf1O6+oewdBJve8TkQPWe75BRM4LZh07IiLDRGSViGwTkS0icotVbuv3vIt62/o9F5FYEflSRDZa9f65VT5SRL6w3u+/WYnQIU27pwARcQA7gbPxrHm1GlhkjNka1Ir5QUT2ATONMbaeCy4ipwG1eJbHn2SV/QaoMMY8aAXqNGPMncGsZ3ud1Ps+oNYY83Aw69YVa923wcaYdSKSBKzFs7XA97Dxe95FvS/Dxu+5tdRRgjGmVkSigE+AW4DbgX8YY14WkT8BG40xTwazrr2lLQ2PWUCeMWaPMaYJeBnP/h+qjxhjPgIq2hV3tveKbXRSb9szxhR6d8Y0xtTg2SBtKDZ/z7uot61Z+wTVWk+jrB8DzMWzmCvY8P0OhAYNj8729AgFBnhPRNaKyA3BrkwPdbb3Sij4kYhssrqvbNXF056I5ALTgS8Iofe8Xb3B5u+5iDhEZAOelb2XA7uBSmNMi3VKKH2udEqDhoffe3fY0MnGmBPwbKd7k9WdovrXk8BxwDSgEPhdcKvTORFJBP4O3GqMqQ52ffzVQb1t/54bY1qNMdPwbOMwCxjf0WkDW6u+p0HDo7M9PWzPGHPQ+l0C/BPPf6yhorO9V2zNGFNsfUC4gaex6Xtu9a3/HXjBGPMPq9j273lH9Q6V9xzA2vb6A2AOnm2tvauJh8znSlc0aHisBsZYMx2igSvw7P9hayKSYA0WYm21ew6wueurbKWzvVdszfuha7kEG77n1sDss8A2Y8wjPods/Z53Vm+7v+cikikiqdbjOOAsPOMxq4BLrdNs934HQmdPWawpfP8XcABLjDG/CnKVuiUio/C0LsCzN8qLdq23iLwEnIFn1c9i4F7gNWAZMBxr7xWfvVRsoZN6n4Gnm8QA+4AfeMcJ7EJETgE+Br4C3Fbx/8EzPmDb97yLei/Cxu+5iEzBM9DtwNpTyBjzC+v/0ZeBdGA98B1jTGPwatp7GjSUUkr5TbunlFJK+U2DhlJKKb9p0FBKKeU3DRpKKaX8pkFDKaWU3zRoKKWU8psGDaWUUn7ToKGUUspv/x/GeNO8ZV/9gwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sales_by_item_id.sum()[1:].plot(legend=True, label=\"Monthly sum\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Outdated items: 12391\n",
      "unique items in test set: 5100\n",
      "Outdated items in test set: 164\n"
     ]
    }
   ],
   "source": [
    "outdated_items = sales_by_item_id[sales_by_item_id.loc[:,'27':].sum(axis=1)==0]\n",
    "print('Outdated items:', len(outdated_items))\n",
    "test = pd.read_csv(r'C:\\Users\\neversleep\\Desktop\\机器学习课设2\\future\\test.csv')\n",
    "print('unique items in test set:', test['item_id'].nunique())\n",
    "print('Outdated items in test set:', test[test['item_id'].isin(outdated_items['item_id'])]['item_id'].nunique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "duplicated lines in sales_train is 6\n"
     ]
    }
   ],
   "source": [
    "print(\"duplicated lines in sales_train is\", len(sales_train[sales_train.duplicated()]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Not exists in month 27 [36]\n",
      "Not exists in month 28 [36]\n",
      "Not exists in month 29 [36]\n",
      "Not exists in month 30 [36]\n",
      "Not exists in month 31 [36]\n",
      "Not exists in month 32 [36]\n",
      "Not exists in month 33 []\n",
      "Shop is outdated for month 27 [ 0  1  8 11 13 17 23 30 32 40 43]\n",
      "Shop is outdated for month 28 [ 0  1  8 11 13 17 23 30 32 33 40 43 54]\n",
      "Shop is outdated for month 29 [ 0  1  8 11 13 17 23 29 30 32 33 40 43 54]\n",
      "Shop is outdated for month 30 [ 0  1  8 11 13 17 23 29 30 32 33 40 43 54]\n",
      "Shop is outdated for month 31 [ 0  1  8 11 13 17 23 29 30 32 33 40 43 54]\n",
      "Shop is outdated for month 32 [ 0  1  8 11 13 17 23 29 30 32 33 40 43 54]\n",
      "Shop is outdated for month 33 [ 0  1  8 11 13 17 23 27 29 30 32 33 40 43 51 54]\n"
     ]
    }
   ],
   "source": [
    "sales_by_shop_id = sales_train.pivot_table(index=['shop_id'],values=['item_cnt_day'], \n",
    "                                        columns='date_block_num', aggfunc=np.sum, fill_value=0).reset_index()\n",
    "sales_by_shop_id.columns = sales_by_shop_id.columns.droplevel().map(str)\n",
    "sales_by_shop_id = sales_by_shop_id.reset_index(drop=True).rename_axis(None, axis=1)\n",
    "sales_by_shop_id.columns.values[0] = 'shop_id'\n",
    "\n",
    "for i in range(27,34):\n",
    "    print('Not exists in month',i,sales_by_shop_id['shop_id'][sales_by_shop_id.loc[:,'0':str(i)].sum(axis=1)==0].unique())\n",
    "\n",
    "for i in range(27,34):\n",
    "    print('Shop is outdated for month',i,sales_by_shop_id['shop_id'][sales_by_shop_id.loc[:,str(i):].sum(axis=1)==0].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "item_categories = pd.read_csv(r'C:\\Users\\neversleep\\Desktop\\机器学习课设2\\future\\items.csv')\n",
    "item_categories = item_categories[['item_id','item_category_id']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>date_block_num</th>\n",
       "      <th>shop_id</th>\n",
       "      <th>item_id</th>\n",
       "      <th>item_price</th>\n",
       "      <th>item_cnt_day</th>\n",
       "      <th>item_category_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>02.01.2013</td>\n",
       "      <td>0</td>\n",
       "      <td>59</td>\n",
       "      <td>22154</td>\n",
       "      <td>999.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>03.01.2013</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "      <td>2552</td>\n",
       "      <td>899.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>05.01.2013</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "      <td>2552</td>\n",
       "      <td>899.000000</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>06.01.2013</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "      <td>2554</td>\n",
       "      <td>1709.050049</td>\n",
       "      <td>1.0</td>\n",
       "      <td>58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>15.01.2013</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "      <td>2555</td>\n",
       "      <td>1099.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         date  date_block_num  shop_id  item_id   item_price  item_cnt_day  \\\n",
       "0  02.01.2013               0       59    22154   999.000000           1.0   \n",
       "1  03.01.2013               0       25     2552   899.000000           1.0   \n",
       "2  05.01.2013               0       25     2552   899.000000          -1.0   \n",
       "3  06.01.2013               0       25     2554  1709.050049           1.0   \n",
       "4  15.01.2013               0       25     2555  1099.000000           1.0   \n",
       "\n",
       "   item_category_id  \n",
       "0                37  \n",
       "1                58  \n",
       "2                58  \n",
       "3                58  \n",
       "4                56  "
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sales_train_merge_cat = pd.merge(sales_train,item_categories, on = 'item_id', how = 'left')\n",
    "sales_train_merge_cat.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sale volume outliers: [2169.]\n",
      "Sale price outliers: [307980.]\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkwAAAELCAYAAAAr0dmKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAEp9JREFUeJzt3X+wpXV9H/D3h91VWbCCCxVNMrNrgGTXITEMcWLaODQFs+u0Y9Pajq3Drk2VDm1Fo3VqZQm7I23TVMMMTCYULA20TBO0dcy0w0ZIJcVlqi4JLlgV1gSnGITlUoxooSx8+8d57s3hsud+9+7ee/bHfb1mzpxzvuc53+/3+fCcw/s+z3P2qdZaAACY7KSjPQEAgGOdwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAhMAEAdAhMAAAdAhMAQMfqxSx8xhlntPXr1y/TVAAAls699977RGvtzKXoa1GBaf369dmzZ89SjAsAsKyq6ltL1ZdDcgAAHQITAECHwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAhMAEAdAhMAAAdAhMAQIfABADQITABAHQITAAAHQITAECHwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAx9cB03XXX5brrrpv2sAAAh23qgWnXrl3ZtWvXtIcFADhsDskBAHQITAAAHQITAECHwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAhMAEAdAhMAAAdAhMAQIfABADQITABAHQITAAAHQITAECHwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAhMAEAdAhMAAAdAhMAQIfABADQITABAHQITAAAHQITAECHwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAhMAEAdAhMAAAdAhMAQIfABADQITABAHQITAAAHQITAECHwAQA0CEwAQB0CEwAAB2rpz3gD37wg2kPCQBwRKYemFpr0x4SAOCIOCQHANAhMAEAdAhMAAAdAhMAQIfABADQITABAHQITAAAHQITAECHwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAhMAEAdAhMAAAdAhMAQIfABADQITABAHQITAAAHQITAECHwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAhMAEAdAhMAAAdAhMAQIfABADQITABAHQITAAAHQITAECHwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAhMAEAdKxe7gFmZmayffv2HDhwIA899NBc+4UXXrjcQ8+pqrTWkiSXXHJJ7r333rTW8p73vCdXXnllTjvttHznO9/JlVdemdtuuy3PPPNMHnvssXz4wx/Oxz/+8ezcuTM33XRTDhw4kFWrVuXqq6/OunXr5tZv586d2bp1a6666qp86EMfyic+8Yns3Lkzt9xyS6666qokyc6dO3PVVVfNvW+Sffv25fLLL89rXvOanHzyyfnYxz7Wfc/4PGbH+OxnP5trrrkmZ511Vk4//fS5Oc/MzOTKK69Ma+1F63GoZse5/PLLc+21175knebP40S30tYXloPPEceDVTt27DjkhW+44YYdl1566aIGuP7667N79+48+eSTi5za8ti7d2/279+fJ554Ivfcc0++//3v53vf+15aa9m9e3cef/zxPPXUU3nuueeye/fuPPvss7nnnnvy7W9/OzMzM3niiSfy7LPP5s1vfnOS0frdfffdc32Nv+eRRx7JM888k/vuuy933313nnnmmbn3TfLBD34w+/fvz1NPPZX9+/e/aKyFzM5jdozLLrssSfL000+/aM7XX399vvCFL7xkPQ7V7Dh79+7Ngw8++JJ1mj+PE91KW19YDj5HLJedO3c+umPHjhuWoq9lPSQ3MzOT22+/fTmHOCJPP/30i54fOHDgoM/nL3f77bdnZmYmMzMz2bVrV1prc8uMv6e1lttvv31umV27dmVmZmbifPbt25eHH374oGMtZHweu3btyq233jq3R228n3379mXXrl2L6nvSOA8//PBL1mn+PBbT9/Fopa0vLAefI44XyxqYbr755peEkBPBc889l1tuuSU333xzXnjhhe6yzz33XJLk+eefzy233DJx2auvvnriWAsZn8fzzz+fG2+88aD9XH311XNzOdS+J40za3yd5s9jMX0fj1ba+sJy8DnieNENTFV1aVXtqao9+/fvX1Tnd95550v2dJwIWmu54447cuedd3YDYWttrgYHDhzIHXfcMXHZ+XuXxsdayPg8Js1nfK/QYvqeNM6s8XWaP4/F9H08WmnrC8vB54jjRTcwtdZuaK1d0Fq74Mwzz1xU5xdddFGq6rAnd6yqqlx88cW56KKLsnr1wufNV9VcDVavXp2LL7544rLr16+fONZCxucxaT5VlfXr17/ov8eh9D1pnFnj6zR/Hovp+3i00tYXloPPEceLZT0kt23btm6gOB6tWbMmW7duzbZt23LSSQuXcM2aNVmzZk2SZNWqVdm6devEZbdv3z5xrIWMz2PVqlV573vfe9B+tm/fPjeXQ+170jizxtdp/jwW0/fxaKWtLywHnyOOF8samNatW5ctW7Ys5xBH5NRTT33R84PtPTnYclu2bMm6deuybt26bN68OVU1t8z4e6oqW7ZsmVtm8+bNC/5k9uyzz37JXqbZsRYyPo/NmzfnXe9610v27G3ZsiVnn312Nm/evKi+J40zu7dqfJ3mz+NE/3nwSltfWA4+Rxwvlv0frty2bVs2btyYc845Z7mHmmg8PFxyySXZtGlTNm7cmJ07d2bt2rV53etel5NOOilXXHFFNm7cmA0bNmTt2rW54oorcsopp2THjh3ZtGlTzj333GzcuPFFfwFt27Yt5513Xnbs2JFTTjklH/3oR+fec955583tiZp93LN9+/asXbs2GzZsyKZNmw75r635Y3zgAx9Ikpx11lkvmvO2bdvm1v9w/pKbHWf79u0HXafFrOuJYKWtLywHnyOOB7WYk7IvuOCCtmfPniMacPYfrLzrrruOqB8AgIVU1b2ttQuWoi+XRgEA6BCYAAA6BCYAgA6BCQCgQ2ACAOgQmAAAOgQmAIAOgQkAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6BCYAAA6BCYAgA6BCQCgQ2ACAOgQmAAAOgQmAIAOgQkAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6BCYAAA6BCYAgA6BCQCgQ2ACAOgQmAAAOgQmAIAOgQkAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6BCYAAA6Vk97wKqa9pAAAEdk6oFp7dq10x4SAOCIOCQHANAhMAEAdAhMAAAdAhMAQIfABADQITABAHQITAAAHQITAECHwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAhMAEAdAhMAAAdAhMAQIfABADQITABAHQITAAAHQITAECHwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAhMAEAdAhMAAAdAhMAQIfABADQITABAHQITAAAHQITAECHwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAhMAEAdAhMAAAdAhMAQIfABADQsXraA27evHnaQwIAHJGpB6b3ve990x4SAOCIOCQHANAhMAEAdAhMAAAdAhMAQIfABADQITABAHQITAAAHQITAECHwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAhMAEAdAhMAAAdAhMAQIfABADQITABAHQITAAAHQITAEBHtdYOfeGq/Um+tQTjnpHkiSXoh4Wp83So83So83So83So83T8WGvtlUvR0erFLNxaO3MpBq2qPa21C5aiLyZT5+lQ5+lQ5+lQ5+lQ5+moqj1L1ZdDcgAAHQITAEDH0QpMNxylcVcadZ4OdZ4OdZ4OdZ4OdZ6OJavzok76BgBYiRySAwDomGpgqqrNVfWNqtpXVR+Z5tgnoqp6uKrur6r7Zn8JUFWvrqo7quqh4f70ob2q6tqh9nur6vyjO/tjV1XdVFWPV9UDY22LrmtVbRuWf6iqth2NdTmWTajzjqr69rBN31dVbxt77Z8Pdf5GVf3CWLvvlQVU1Y9U1eer6mtV9dWqev/QbpteQgvU2Ta9hKrqFVX1par6ylDnnUP7hqr64rBt/k5VvWxof/nwfN/w+vqxvg5a/4laa1O5JVmV5JtJXp/kZUm+kmTTtMY/EW9JHk5yxry2X0vykeHxR5L86+Hx25LcnqSS/EySLx7t+R+rtyRvSXJ+kgcOt65JXp3kj4f704fHpx/tdTuWbhPqvCPJPz3IspuG74yXJ9kwfJes8r1ySHV+bZLzh8evTPLgUE/b9HTqbJte2jpXklOHx2uSfHHYTm9L8s6h/foklw2P/1GS64fH70zyOwvVf6Gxp7mH6U1J9rXW/ri19v+S/HaSt09x/JXi7UluHh7fnORvjLXf0kb+Z5LTquq1R2OCx7rW2v9I8uS85sXW9ReS3NFae7K19n+S3JFk8/LP/vgxoc6TvD3Jb7fWnm2t/UmSfRl9p/he6WitPdpa+8Ph8feSfC3JD8U2vaQWqPMktunDMGyXTw9P1wy3luTnk3x6aJ+/Pc9u559O8lerqjK5/hNNMzD9UJL/Pfb8kSy8MdHXknyuqu6tqkuHtte01h5NRh/gJH9xaFf/I7PYuqr34fsnw6Ggm2YPE0Wdl8RwOOKnMvqr3Da9TObVObFNL6mqWlVV9yV5PKPg/s0kT7XWDgyLjNdsrp7D699Nsi6HUedpBqY6SJuf6B2Zv9RaOz/JliT/uKressCy6r88JtVVvQ/Pbyb50SRvTPJokk8M7ep8hKrq1CT/OckHWmt/ttCiB2lT60N0kDrbppdYa+351tobk/xwRnuFNh5sseF+yeo8zcD0SJIfGXv+w0n+dIrjn3Baa3863D+e5DMZbTiPzR5qG+4fHxZX/yOz2Lqq92ForT02fBm+kOTG/PkucnU+AlW1JqP/id/aWvsvQ7NteokdrM626eXTWnsqyV0ZncN0WlXNXu5tvGZz9Rxef1VGpwIsus7TDExfTnLOcCb7yzI6+ep3pzj+CaWqTqmqV84+TvLWJA9kVNPZX69sS/LZ4fHvJtk6/ALmZ5J8d3Z3PIdksXX9vSRvrarTh13wbx3aWMC88+p+MaNtOhnV+Z3DL142JDknyZfie6VrOF/j3yX5Wmvt18desk0voUl1tk0vrao6s6pOGx6fnOSijM4X+3ySdwyLzd+eZ7fzdyT572101vek+k825bPb35bRLwe+meSKaY59ot0y+gXFV4bbV2frmdGx2d9P8tBw/+r2578s+I2h9vcnueBor8OxekvynzLadf5cRn+F/IPDqWuSX8roRMJ9Sf7+0V6vY+02oc7/Yajj3uEL7bVjy18x1PkbSbaMtfteWbjOfzmjQw17k9w33N5mm55anW3TS1vnn0jyR0M9H0jyK0P76zMKPPuSfCrJy4f2VwzP9w2vv75X/0k3/9I3AECHf+kbAKBDYAIA6BCYAAA6BCYAgA6BCQCgQ2ACAOgQmGCFq6p7hvv1VfX3jvZ8xlXVu6vqdYtY/sKq+q/LOSdgZRKYYIVrrf3s8HB9kmMqMCV5d5JDDkwAy0VgghWuqp4eHv5qkp+rqvuq6peHK4L/m6r68nCl9X84LH9hVf1BVd1WVQ9W1a9W1buq6ktVdX9V/egCY72mqj5TVV8Zbj877Nn6WlXdWFVfrarPVdXJVfWOJBckuXWY08kT+txcVV+vqi8k+Ztj7W+qqnuq6o+G+x8b2u+uqjeOLbe7qn7iiAsJnNAEJmDWR5Lc3Vp7Y2vtmowuVfLd1tpPJ/npJO8drrmUJD+Z5P1JzktySZJzW2tvSvLJJO9bYIxrk/xBa+0nk5yf0WV9ktF1nH6jtfaGJE8l+VuttU8n2ZPkXcOc/u/8zqrqFRld0PSvJ/m5JGeNvfz1JG9prf1Ukl9J8i+H9k9mtOcqVXVuRpdQ2HsoBQJWLoEJmOStGV2E9b4kX8zo2mPnDK99ubX2aGvt2YyuxfS5of3+jA7tTfLzSX4zSdroCu7fHdr/pLV23/D43k4f4358eO9DbXSdp/849tqrknyqqh5Ick2SNwztn0ry14Yry/9Skt86xLGAFWz10Z4AcMyqJO9rrb3oivRVdWGSZ8eaXhh7/kIO73tlvL/nkxz08NsEky6I+bEkn2+t/WJVrU9yV5K01n5QVXckeXuSv5PRYT+ABdnDBMz6XpJXjj3/vSSXDXtiUlXnVtUpRzjG7ye5bOhvVVX9hUXOab6vJ9kwdt7U3x177VVJvj08fve8930yo8ODX26tPXkI8wZWOIEJmLU3yYHhZOxfzihU/K8kfzgc1vq3OfK90u9P8leq6v6MDr29obP8byW5ftJJ3621Z5JcmuS/DSd9f2vs5V9L8q+qaneSVfPed2+SP0vy7w93RYCVpUaH/QFWjuHfdroryY+31l44ytMBjgP2MAErSlVtzegk9iuEJeBQ2cMELLmquiLJ357X/KnW2r84gj4/k2TDvOZ/Nv+kdIDlIDABAHQ4JAcA0CEwAQB0CEwAAB0CEwBAh8AEANDx/wHIp5ZSN61auQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 720x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkMAAAELCAYAAADa2oIHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAECFJREFUeJzt3X2s3XV9B/D3h17EpTotDzNMzS56NZsTQeyMZJs8DBGNCS7RzIyEus10uq3iEv9goRmSlMQ9ZoDLSMeMsJD5tC1zS+TBjbllD2JxhUIMcmE1cxLBQkWbDC397o/zu/W23Ifecnpu2+/rlZyc3/n+Hr6/++n33PPu7/c791ettQAA9OqE1d4BAIDVJAwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6NrWShU899dQ2PT19hHYFAGB87rnnnm+31k5bbrkVhaHp6els27bt8PcKAGBCqurrh7Kc02QAQNeEIQCga8IQANA1YQgA6JowBAB0TRgCALomDAEAXROGAICuCUMAQNeEIQCga8IQANA1YQgA6JowBAB0TRgCALomDAEAXROGAICuCUMAQNeEIQCga8IQANC1qUl29r73vS+7d+/Oeeedl02bNk2yawCABU00DD366KPZs2dPZmdnJ9ktAMCinCYDALomDAEAXROGAICuCUMAQNeEIQCga8IQANA1YQgA6JowBAB0TRgCALomDAEAXROGAICuCUMAQNeEIQCga8IQANA1YQgA6JowBAB0TRgCALomDAEAXROGAICuCUMAQNeEIQCga8IQANA1YQgA6JowBAB0TRgCALomDAEAXROGAICuCUMAQNeEIQCga8IQANA1YQgA6JowBAB0TRgCALomDAEAXROGAICuCUMAQNeEIQCga8IQANA1YQgA6JowBAB0TRgCALomDAEAXZtoGHr66af3T99www254YYbJtk9AMCzTE2ys3379u2fnp2dnWTXAAALcpoMAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXplar43vvvTdJcv7556/WLhwx69aty1NPPZVnnnkmSXL22Wdn+/bt2bhxY2699dZcd911mZmZSZLMzs7miiuuyGWXXZatW7cmSa6++upccMEFy/aza9euXHPNNfngBz+Y66+/PldffXVOOeWUQ97PufVXuh4AjNNqfx45MnQEPPnkk/uDUJJs3749SbJ169bs2bMnW7Zs2T9vy5Yt2bNnz/4glCTXXnvtIfVz8803Z8eOHdmyZUt27NiRW265ZUX7Obf+StcDgHFa7c+jVQlDc0eFerVz587Mzs5mdnY2O3fufNb8vXv35q677lpyG7t27cptt92W1lp27tyZ1lpuu+227Nq165D2Yf76K1kPAMbpaPg8cmRolWzZsuWAI0QHW+7o0M0335x9+/Yd0PbMM88ccqqev/5K1gOAcToaPo+WDUNVtbGqtlXVtscff3wS+9SFnTt3LnhUaM7evXuXXP8LX/jCs5bZu3dv7rzzzkPqf/76K1kPAMbpaPg8WjYMtda2ttbWt9bWn3baaZPYpy5MT09nenp60flTU0tf237RRRc9a5mpqam85S1vOaT+56+/kvUAYJyOhs8jp8lWyebNm7N58+ZF51911VVLrr9hw4accMKB/3xr1qzJ5Zdffkj9z19/JesBwDgdDZ9HqxKGzjrrrNXo9qgxPT2dmZmZzMzMLHh0aGpqatmv1p9yyim55JJLUlWZnp5OVeWSSy455K8kzl9/JesBwDgdDZ9HjgwdAevWrcuaNWv2vz777LOTJBs3bszatWsPOCK0efPmrF27Nhs3btzfttxRoTkbNmzImWeemc2bN+fMM89ccZqeW99RIQBW02p/HlVr7ZAXXr9+fdu2bdthd3bhhRdm3759BxwZuu666w57ewAAi6mqe1pr65dbzpEhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdm5pkZyeccEL27duXJJmZmZlk1wAAC5poGDrppJOyd+/eJMmmTZsm2TUAwIKcJgMAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAujY1yc5OP/307N69OzMzM5PsFgBgURMNQzfddNMkuwMAWJbTZABA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADoWrXWDn3hqseTfP059nlqkm8/x23wQ+o5fmo6Xuo5fmo6Xuo5fkdLTX+itXbacgutKAyNQ1Vta62tn2inxzH1HD81HS/1HD81HS/1HL9jraZOkwEAXROGAICurUYY2roKfR7P1HP81HS81HP81HS81HP8jqmaTvyaIQCAo4nTZABA1yYWhqrqkqp6sKpmq+rKSfV7rKiqnVW1o6q2V9W2oe3kqrqzqh4antcN7VVV1w+1vK+qzpm3nQ3D8g9V1YZ57W8Ytj87rFuT/ymPrKr6eFU9VlX3z2s74jVcrI9j3SL1/EhV/e8wTrdX1dvnzfudoTYPVtVb57Uv+N6vqjOq6ktD3T5VVc8b2k8aXs8O86cn8xMfeVX18qq6q6q+WlUPVNUVQ7txehiWqKdxepiq6vlVdXdV3TvU9JqhfcV1GFetJ6K1dsQfSdYkeTjJK5I8L8m9SV4zib6PlUeSnUlOPajt95NcOUxfmeT3hum3J/l8kkrypiRfGtpPTvLI8LxumF43zLs7ybnDOp9P8rbV/pmPQA3fnOScJPdPsoaL9XGsPxap50eSfHiBZV8zvK9PSnLG8H5fs9R7P8mnk7xnmL4xyQeG6d9IcuMw/Z4kn1rtWoyxpqcnOWeYfmGSrw21M07HW0/j9PBrWkleMEyfmORLw9hbUR3GWetJPCZ1ZOiNSWZba4+01r6f5JNJLp1Q38eyS5PcPEzfnOSd89pvaSP/meTFVXV6krcmubO19kRr7ckkdya5ZJj3o621/2ijUXbLvG0dN1pr/5LkiYOaJ1HDxfo4pi1Sz8VcmuSTrbWnW2v/nWQ2o/f9gu/94WjFhUk+O6x/8L/NXD0/m+QX5o5uHOtaa4+21r4yTH83yVeTvDTG6WFZop6LMU6XMYy17w0vTxweLSuvwzhrfcRNKgy9NMn/zHv9jSw9YHvUktxRVfdU1cah7SWttUeT0Zs+yY8N7YvVc6n2byzQ3oNJ1HCxPo5XvzWcsvn4vFMtK63nKUl2t9b2HtR+wLaG+d8Zlj+uDKcTXp/R/7yN0+fooHomxulhq6o1VbU9yWMZBe2Hs/I6jLPWR9ykwtBCadnX2A70s621c5K8LclvVtWbl1h2sXqutL1nanh4/izJK5OcneTRJH80tI+znsd9ravqBUn+OsmHWmtPLbXoAm3G6UEWqKdx+hy01p5prZ2d5GUZHcn5qYUWG57HVdNVreekwtA3krx83uuXJfnmhPo+JrTWvjk8P5bkbzMagN8aDntneH5sWHyxei7V/rIF2nswiRou1sdxp7X2reEX5b4kf57ROE1WXs9vZ3TKZ+qg9gO2Ncx/UQ79dN1Rr6pOzOiD+9bW2t8MzcbpYVqonsbpeLTWdif554yuGVppHcZZ6yNuUmHoy0leNVwp/ryMLrL63IT6PupV1dqqeuHcdJKLk9yfUY3mviWyIcnfDdOfS3J5jbwpyXeGw963J7m4qtYNh4UvTnL7MO+7VfWm4bzs5fO2dbybRA0X6+O4M/dhOvjFjMZpMqrBe4ZvlpyR5FUZXci74Ht/uJ7lriTvGtY/+N9mrp7vSvJPw/LHvGHs/EWSr7bW/njeLOP0MCxWT+P08FXVaVX14mH6R5JclNG1WCutwzhrfeSN60rs5R4ZfSviaxmde7xqUv0eC4+Mrqq/d3g8MFefjM6h/mOSh4bnk4f2SvKnQy13JFk/b1u/mtGFarNJfmVe+/qMfiE8nORjGf7g5vH0SPJXGR0S/0FG//v4tUnUcLE+jvXHIvX8y6Fe92X0y+70ectfNdTmwcz7tuJi7/1h3N891PkzSU4a2p8/vJ4d5r9itWsxxpr+XEaH/u9Lsn14vN04HXs9jdPDr+nrkvzXULv7k/zu4dZhXLWexMNfoAYAuuYvUAMAXROGAICuCUMAQNeEIQCga8IQANA1YQgA6JowBCypqv59eJ6uql9e7f1Jkqr68ar67PJLAizP3xkCDklVnZ/kw621d6zyfky1H97MEeA5c2QIWFJVfW+Y/GiSn6+q7VX128Odrf+gqr483B3814flz6+qL1bVp6vqa1X10aq6rKrurqodVfXKJfr6RFXdWFX/Oqz7jqH9vVX1mar6+yR3DEep7h/mramqPxy2fV9VbRra3zDsxz1VdftBt2gA2G9q+UUAkiRXZt6RoaramNG9sn6mqk5K8m9Vdcew7FkZ3en6iSSPJLmptfbGqroiyaYkH1qin+kk52V01/G7qmpmaD83yetaa09U1fS85TcmOSPJ61tre6vq5OHmnTckubS19nhV/VKSazO6hQXAAYQh4HBdnOR1VTV3Y8UXZXQzxu8n+XIb3TQ0VfVwkrmQtCPJBcts99NtdLfxh6rqkSQ/ObTf2Vpb6K7gFyW5ce7U2RCWXpvktUnuHN3LM2syus8awLMIQ8DhqiSbWmu3H9A4urbo6XlN++a93pflf+8cfCHj3Os9S+zHwetUkgdaa+cu0xeAa4aAQ/bdJC+c9/r2JB8YTkmlql5dVWvH0M+7q+qE4dqiV2R0x+ul3JHk/VU1NezHycM6p1XVuUPbiVX102PYN+A45MgQcKjuS7K3qu5N8okk12V0fc9XanQu6vEk7xxDPw8m+WKSlyR5f2vt/4ZTXYu5Kcmrk9xXVT9I8uettY8Np++ur6oXZfS77k+SPDCG/QOOM75aDxw1quoTSf6hteZvCAET4zQZANA1p8mAiauqq5K8+6Dmz7TW3rsKuwN0zmkyAKBrTpMBAF0ThgCArglDAEDXhCEAoGvCEADQtf8HxvRGWhkAe7gAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 720x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure(figsize=(10,4))\n",
    "plt.xlim(-100,3000)\n",
    "sns.boxplot(x = sales_train['item_cnt_day'])\n",
    "print('Sale volume outliers:',sales_train['item_cnt_day'][sales_train['item_cnt_day']>1001].unique())\n",
    "plt.figure(figsize=(10,4))\n",
    "plt.xlim(-10000,320000)\n",
    "sns.boxplot(x = sales_train['item_price'])\n",
    "print('Sale price outliers:',sales_train['item_price'][sales_train['item_price']>300000].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x15498522080>"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkwAAAELCAYAAAAr0dmKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAEnxJREFUeJzt3XGsneV9H/DvD9tJMGSBGAZJW8lOgdaOaFNEo6ZbI9ZBakebsm7ZlA1hZ13CxDZImixaFkyxFbZ1XVIkUFUGGSve0FqSLUq1CTfQhY4YLYlpiSFLAk5LNFIC5jJSSAbD8OyP897bw8XnPr729bHx/Xyko/Oe57zneZ73x8vhy/u+577VWgsAAJOdcLQnAABwrBOYAAA6BCYAgA6BCQCgQ2ACAOgQmAAAOgQmAIAOgQkAoENgAgDoWLmYlU877bS2du3aIzQVAIClc++99z7RWjt9KfpaVGBau3Ztdu/evRTjAgAcUVX17aXqyyk5AIAOgQkAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6BCYAAA6BCYAgA6BCQCgQ2ACAOgQmAAAOgQmAIAOgQkAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6Jh6YLr++utz/fXXT3tYAIBDNvXAtHPnzuzcuXPawwIAHDKn5AAAOgQmAIAOgQkAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6BCYAAA6BCYAgA6BCQCgQ2ACAOgQmAAAOgQmAIAOgQkAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6BCYAAA6BCYAgA6BCQCgQ2ACAOgQmAAAOgQmAIAOgQkAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6BCYAAA6BCYAgA6BCQCgQ2ACAOgQmAAAOgQmAIAOgQkAoENgAgDoEJgAADoEJgCAjpXTHvAHP/jBtIcEADgsUw9MrbVpDwkAcFickgMA6BCYAAA6BCYAgA6BCQCgQ2ACAOgQmAAAOgQmAIAOgQkAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6BCYAAA6BCYAgA6BCQCgQ2ACAOgQmAAAOgQmAIAOgQkAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6BCYAAA6BCYAgA6BCQCgQ2ACAOgQmAAAOgQmAIAOgQkAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6BCYAAA6Vh7pAWZmZrJ169bs378/Dz300Fz7BRdccKSHnlNVaa0lSS655JLce++9aa3lfe97X6666qqccsop+e53v5urrroqt912W5599tk89thj+chHPpJPfOIT2b59e26++ebs378/K1asyDXXXJM1a9bMbd/27duzefPmXH311fnwhz+cT37yk9m+fXt27NiRq6++Okmyffv2XH311XOfm2Tv3r254oorcsYZZ+TEE0/Mxz/+8e5nxucxO8bnPve5XHvttTnzzDNz6qmnzs15ZmYmV111VVprL9mOgzU7zhVXXJHrrrvuZds0fx7Hu+W2vQDL1Ypt27Yd9Mo33njjtksvvXRRA9xwww3ZtWtXnnzyyUVO7cjYs2dP9u3blyeeeCL33HNPvv/97+fpp59Oay27du3K448/nqeeeirPP/98du3aleeeey733HNPvvOd72RmZiZPPPFEnnvuubztbW9LMtq+u+++e66v8c888sgjefbZZ3Pffffl7rvvzrPPPjv3uUk+9KEPZd++fXnqqaeyb9++l4y1kNl5zI5x2WWXJUmeeeaZl8z5hhtuyBe/+MWXbcfBmh1nz549efDBB1+2TfPncbxbbtsL8Eqyffv2R7dt23bjUvR1RE/JzczM5Pbbbz+SQxyWZ5555iWv9+/ff8DX89e7/fbbMzMzk5mZmezcuTOttbl1xj/TWsvtt98+t87OnTszMzMzcT579+7Nww8/fMCxFjI+j507d+bWW2+dO6I23s/evXuzc+fORfU9aZyHH374Zds0fx6L6fuVaLltL8BydkQD0y233PKyEHI8eP7557Njx47ccsstefHFF7vrPv/880mSF154ITt27Ji47jXXXDNxrIWMz+OFF17ITTfddMB+rrnmmrm5HGzfk8aZNb5N8+exmL5fiZbb9gIsZ93AVFWXVtXuqtq9b9++RXV+5513vuxIx/GgtZY77rgjd955ZzcQttbmarB///7ccccdE9edf3RpfKyFjM9j0nzGjwotpu9J48wa36b581hM369Ey217AZazbmBqrd3YWju/tXb+6aefvqjOL7zwwlTVIU/uWFVVueiii3LhhRdm5cqFr5uvqrkarFy5MhdddNHEddeuXTtxrIWMz2PSfKoqa9eufck/j4Ppe9I4s8a3af48FtP3K9Fy216A5eyInpLbsmVLN1C8Eq1atSqbN2/Oli1bcsIJC5dw1apVWbVqVZJkxYoV2bx588R1t27dOnGshYzPY8WKFXn/+99/wH62bt06N5eD7XvSOLPGt2n+PBbT9yvRcttegOXsiAamNWvWZNOmTUdyiMNy8sknv+T1gY6eHGi9TZs2Zc2aNVmzZk02btyYqppbZ/wzVZVNmzbNrbNx48YFf3p+1llnvewo0+xYCxmfx8aNG3PxxRe/7Mjepk2bctZZZ2Xjxo2L6nvSOLNHq8a3af48jvef2S+37QVYzo74H67csmVL1q9fn7PPPvtIDzXReHi45JJLsmHDhqxfvz7bt2/P6tWr88Y3vjEnnHBCrrzyyqxfvz7r1q3L6tWrc+WVV+akk07Ktm3bsmHDhpxzzjlZv379S44kbNmyJeeee262bduWk046KR/72MfmPnPuuefOHYmaXe7ZunVrVq9enXXr1mXDhg0HfdRi/hgf/OAHkyRnnnnmS+a8ZcuWue0/lCMis+Ns3br1gNu0mG09Hiy37QVYrmoxF2Wff/75bffu3Yc14OwfrLzrrrsOqx8AgIVU1b2ttfOXoi+3RgEA6BCYAAA6BCYAgA6BCQCgQ2ACAOgQmAAAOgQmAIAOgQkAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6BCYAAA6BCYAgA6BCQCgQ2ACAOgQmAAAOgQmAIAOgQkAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6BCYAAA6BCYAgA6BCQCgQ2ACAOgQmAAAOgQmAIAOgQkAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6BCYAAA6Vk57wKqa9pAAAIdl6oFp9erV0x4SAOCwOCUHANAhMAEAdAhMAAAdAhMAQIfABADQITABAHQITAAAHQITAECHwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAhMAEAdAhMAAAdAhMAQIfABADQITABAHQITAAAHQITAECHwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAhMAEAdAhMAAAdAhMAQIfABADQITABAHQITAAAHQITAECHwAQA0CEwAQB0CEwAAB0CEwBAh8AEANAhMAEAdAhMAAAdAhMAQIfABADQsXLaA27cuHHaQwIAHJapB6bLL7982kMCABwWp+QAADoEJgCADoEJAKBDYAIA6BCYAAA6BCYAgA6BCQCgQ2ACAOgQmAAAOgQmAIAOgQkAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6BCYAAA6BCYAgA6BCQCgQ2ACAOio1trBr1y1L8m3l2Dc05I8sQT9sDB1ng51ng51ng51ng51no4fa629dik6WrmYlVtrpy/FoFW1u7V2/lL0xWTqPB3qPB3qPB3qPB3qPB1VtXup+nJKDgCgQ2ACAOg4WoHpxqM07nKjztOhztOhztOhztOhztOxZHVe1EXfAADLkVNyAAAdUw1MVbWxqr5ZVXur6qPTHPt4VFUPV9X9VXXf7C8Bqur1VXVHVT00PJ86tFdVXTfUfk9VnXd0Z3/sqqqbq+rxqnpgrG3Rda2qLcP6D1XVlqOxLceyCXXeVlXfGfbp+6rqnWPv/fOhzt+sql8Ya/e9soCq+pGq+kJVfb2qvlZVHxja7dNLaIE626eXUFW9pqq+XFVfHeq8fWhfV1VfGvbN36mqVw3trx5e7x3eXzvW1wHrP1FrbSqPJCuSfCvJm5K8KslXk2yY1vjH4yPJw0lOm9f2a0k+Oix/NMm/HpbfmeT2JJXkZ5J86WjP/1h9JHl7kvOSPHCodU3y+iR/PDyfOiyferS37Vh6TKjztiT/9ADrbhi+M16dZN3wXbLC98pB1fkNSc4bll+b5MGhnvbp6dTZPr20da4kJw/Lq5J8adhPb0vynqH9hiSXDcv/KMkNw/J7kvzOQvVfaOxpHmF6a5K9rbU/bq39vyS/neRdUxx/uXhXkluG5VuS/I2x9h1t5H8mOaWq3nA0Jnisa639jyRPzmtebF1/IckdrbUnW2v/J8kdSTYe+dm/ckyo8yTvSvLbrbXnWmt/kmRvRt8pvlc6WmuPttb+cFh+OsnXk/xQ7NNLaoE6T2KfPgTDfvnM8HLV8GhJfj7JZ4b2+fvz7H7+mSR/taoqk+s/0TQD0w8l+d9jrx/JwjsTfS3J56vq3qq6dGg7o7X2aDL6FzjJXxza1f/wLLau6n3o/slwKujm2dNEUeclMZyO+KmM/q/cPn2EzKtzYp9eUlW1oqruS/J4RsH9W0meaq3tH1YZr9lcPYf3v5dkTQ6hztMMTHWANj/ROzx/qbV2XpJNSf5xVb19gXXV/8iYVFf1PjS/meRHk7wlyaNJPjm0q/NhqqqTk/znJB9srf3ZQqseoE2tD9IB6myfXmKttRdaa29J8sMZHRVaf6DVhuclq/M0A9MjSX5k7PUPJ/nTKY5/3Gmt/enw/HiSz2a04zw2e6pteH58WF39D89i66reh6C19tjwZfhikpvy54fI1fkwVNWqjP4jfmtr7b8MzfbpJXagOtunj5zW2lNJ7sroGqZTqmr2dm/jNZur5/D+6zK6FGDRdZ5mYPpKkrOHK9lfldHFV787xfGPK1V1UlW9dnY5yTuSPJBRTWd/vbIlyeeG5d9Nsnn4BczPJPne7OF4Dspi6/p7Sd5RVacOh+DfMbSxgHnX1f1iRvt0Mqrze4ZfvKxLcnaSL8f3Stdwvca/S/L11tqvj71ln15Ck+psn15aVXV6VZ0yLJ+Y5MKMrhf7QpJ3D6vN359n9/N3J/nvbXTV96T6Tzblq9vfmdEvB76V5Mppjn28PTL6BcVXh8fXZuuZ0bnZ30/y0PD8+vbnvyz4jaH29yc5/2hvw7H6SPKfMjp0/nxG/xfyDw6lrkl+KaMLCfcm+ftHe7uOtceEOv+HoY57hi+0N4ytf+VQ528m2TTW7ntl4Tr/5YxONexJct/weKd9emp1tk8vbZ1/IskfDfV8IMmvDO1vyijw7E3y6SSvHtpfM7zeO7z/pl79Jz38pW8AgA5/6RsAoENgAgDoEJgAADoEJgCADoEJAKBDYAIA6BCYYJmrqnuG57VV9feO9nzGVdV7q+qNi1j/gqr6r0dyTsDyJDDBMtda+9lhcW2SYyowJXlvkoMOTABHisAEy1xVPTMs/mqSn6uq+6rql4c7gv+bqvrKcKf1fzisf0FV/UFV3VZVD1bVr1bVxVX15aq6v6p+dIGxzqiqz1bVV4fHzw5Htr5eVTdV1deq6vNVdWJVvTvJ+UluHeZ04oQ+N1bVN6rqi0n+5lj7W6vqnqr6o+H5x4b2u6vqLWPr7aqqnzjsQgLHNYEJmPXRJHe31t7SWrs2o1uVfK+19tNJfjrJ+4d7LiXJTyb5QJJzk1yS5JzW2luTfCrJ5QuMcV2SP2it/WSS8zK6rU8yuo/Tb7TW3pzkqSR/q7X2mSS7k1w8zOn/zu+sql6T0Q1N/3qSn0ty5tjb30jy9tbaTyX5lST/cmj/VEZHrlJV52R0C4U9B1MgYPkSmIBJ3pHRTVjvS/KljO49dvbw3ldaa4+21p7L6F5Mnx/a78/o1N4kP5/kN5Okje7g/r2h/U9aa/cNy/d2+hj348NnH2qj+zz9x7H3Xpfk01X1QJJrk7x5aP90kr823Fn+l5L81kGOBSxjK4/2BIBjViW5vLX2kjvSV9UFSZ4ba3px7PWLObTvlfH+XkhywNNvE0y6IebHk3yhtfaLVbU2yV1J0lr7QVXdkeRdSf5ORqf9ABbkCBMw6+kkrx17/XtJLhuOxKSqzqmqkw5zjN9PctnQ34qq+guLnNN830iybuy6qb879t7rknxnWH7vvM99KqPTg19prT15EPMGljmBCZi1J8n+4WLsX84oVPyvJH84nNb6tzn8o9IfSPJXqur+jE69vbmz/m8luWHSRd+ttWeTXJrkvw0XfX977O1fS/KvqmpXkhXzPndvkj9L8u8PdUOA5aVGp/0Blo/hbzvdleTHW2svHuXpAK8AjjABy0pVbc7oIvYrhSXgYDnCBCy5qroyyd+e1/zp1tq/OIw+P5tk3bzmfzb/onSAI0FgAgDocEoOAKBDYAIA6BCYAAA6BCYAgA6BCQCg4/8DGr1e8LdpQOsAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 720x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkMAAAELCAYAAADa2oIHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAEAFJREFUeJzt3XusZVV9B/Dvj7mIzWh1eNRQNb3o1bRWBHFqJG3lUUQ0JthEU1MSxrZmqm1HbOIfNEyKJENinylgU0KpERpSX21T20RgbKlt+hAHOzAQg1zomFqJ4MCITlJ0mNU/zr54Z7iPucOZc5m7Pp/k5Oyz9t5n7fnNOne+s/Y+d1drLQAAvTputQ8AAGA1CUMAQNeEIQCga8IQANA1YQgA6JowBAB0TRgCALomDAEAXROGAICuTa1k45NPPrlNT08fpUMBABifu+6669uttVOW225FYWh6ejo7duw48qMCAJiQqvr64WznNBkA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRtapKdve9978vevXtzzjnnZMuWLZPsGgBgQRMNQw8//HD27duX2dnZSXYLALAop8kAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA14QhAKBrwhAA0DVhCADomjAEAHRNGAIAuiYMAQBdE4YAgK4JQwBA1yYahp588smnl6+77rpcd911k+weAOAZpibZ2YEDB55enp2dnWTXAAALcpoMAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXplar47vvvjtJcu65567WIRw1GzZsyBNPPJGnnnoqSXLmmWdm586d2bx5c2655ZZcc801mZmZSZLMzs7msssuyyWXXJIbbrghSXLllVfmvPPOW7afPXv25KqrrsoHP/jBXHvttbnyyitz0kknHfZxzu2/0v0AYC0xM3QUPP74408HoSTZuXNnkuSGG27Ivn37sm3btqfXbdu2Lfv27Xs6CCXJ1VdffVj93HTTTdm1a1e2bduWXbt25eabb17Rcc7tv9L9AGAtWZUwNDcr1Kvdu3dndnY2s7Oz2b179zPW79+/P3fccceS77Fnz57ceuutaa1l9+7daa3l1ltvzZ49ew7rGObvv5L9AGCtMTO0SrZt23bQDNGhlpsduummm3LgwIGD2p566qnDnuWZv/9K9gOAtWbZMFRVm6tqR1XtePTRRydxTF3YvXv3grNCc/bv37/k/l/4wheesc3+/fuzffv2w+p//v4r2Q8A1pplw1Br7YbW2sbW2sZTTjllEsfUhenp6UxPTy+6fmpq6WvbL7jggmdsMzU1lbe85S2H1f/8/VeyHwCsNU6TrZKtW7dm69ati66/4oorltx/06ZNOe64g//61q1bl0svvfSw+p+//0r2A4C1ZlXC0BlnnLEa3T5nTE9PZ2ZmJjMzMwvODk1NTS371fqTTjopF110Uaoq09PTqapcdNFFh/0V+fn7r2Q/AFhrzAwdBRs2bMi6deuefn3mmWcmSTZv3pz169cfNCO0devWrF+/Pps3b366bblZoTmbNm3K6aefnq1bt+b0009f8ezO3P5mhQDoWbXWDnvjjRs3th07dhxxZ+eff34OHDhw0MzQNddcc8TvBwCwmKq6q7W2cbntzAwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOja1CQ7O+6443LgwIEkyczMzCS7BgBY0ETD0AknnJD9+/cnSbZs2TLJrgEAFuQ0GQDQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQtalJdnbqqadm7969mZmZmWS3AACLmmgYuvHGGyfZHQDAspwmAwC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIAOiaMAQAdE0YAgC6JgwBAF0ThgCArglDAEDXqrV2+BtXPZrk68+yz5OTfPtZvgc/pJ7jp6bjpZ7jp6bjpZ7j91yp6U+01k5ZbqMVhaFxqKodrbWNE+10DVPP8VPT8VLP8VPT8VLP8TvWauo0GQDQNWEIAOjaaoShG1ahz7VMPcdPTcdLPcdPTcdLPcfvmKrpxK8ZAgB4LnGaDADo2sTCUFVdVFX3V9VsVV0+qX6PFVW1u6p2VdXOqtoxtJ1YVdur6oHhecPQXlV17VDLe6rqrHnvs2nY/oGq2jSv/Q3D+88O+9bk/5RHV1V9vKoeqap757Ud9Rou1sexbpF6fqSq/ncYpzur6u3z1v3OUJv7q+qt89oX/OxX1WlV9aWhbp+qqucN7ScMr2eH9dOT+RMffVX18qq6o6q+WlX3VdVlQ7txegSWqKdxeoSq6vlVdWdV3T3U9KqhfcV1GFetJ6K1dtQfSdYleTDJK5I8L8ndSV4zib6PlUeS3UlOPqTt95NcPixfnuT3huW3J/l8kkrypiRfGtpPTPLQ8LxhWN4wrLszydnDPp9P8rbV/jMfhRq+OclZSe6dZA0X6+NYfyxSz48k+fAC275m+FyfkOS04fO+bqnPfpJPJ3nPsHx9kg8My7+R5Pph+T1JPrXatRhjTU9Nctaw/MIkXxtqZ5yOt57G6ZHXtJK8YFg+PsmXhrG3ojqMs9aTeExqZuiNSWZbaw+11r6f5JNJLp5Q38eyi5PcNCzflOSd89pvbiP/meTFVXVqkrcm2d5ae6y19niS7UkuGtb9aGvtP9polN08773WjNbavyR57JDmSdRwsT6OaYvUczEXJ/lka+3J1tp/J5nN6HO/4Gd/mK04P8lnh/0P/buZq+dnk/zC3OzGsa619nBr7SvD8neTfDXJS2OcHpEl6rkY43QZw1j73vDy+OHRsvI6jLPWR92kwtBLk/zPvNffyNIDtkctye1VdVdVbR7aXtJaezgZfeiT/NjQvlg9l2r/xgLtPZhEDRfrY636reGUzcfnnWpZaT1PSrK3tbb/kPaD3mtY/51h+zVlOJ3w+oz+522cPkuH1DMxTo9YVa2rqp1JHskoaD+YlddhnLU+6iYVhhZKy77GdrCfba2dleRtSX6zqt68xLaL1XOl7T1TwyPzZ0lemeTMJA8n+aOhfZz1XPO1rqoXJPnrJB9qrT2x1KYLtBmnh1ignsbps9Bae6q1dmaSl2U0k/NTC202PI+rpqtaz0mFoW8kefm81y9L8s0J9X1MaK19c3h+JMnfZjQAvzVMe2d4fmTYfLF6LtX+sgXaezCJGi7Wx5rTWvvW8IPyQJI/z2icJiuv57czOuUzdUj7Qe81rH9RDv903XNeVR2f0T/ct7TW/mZoNk6P0EL1NE7Ho7W2N8k/Z3TN0ErrMM5aH3WTCkNfTvKq4Urx52V0kdXnJtT3c15Vra+qF84tJ7kwyb0Z1WjuWyKbkvzdsPy5JJfWyJuSfGeY9r4tyYVVtWGYFr4wyW3Duu9W1ZuG87KXznuvtW4SNVysjzVn7h/TwS9mNE6TUQ3eM3yz5LQkr8roQt4FP/vD9Sx3JHnXsP+hfzdz9XxXkn8atj/mDWPnL5J8tbX2x/NWGadHYLF6GqdHrqpOqaoXD8s/kuSCjK7FWmkdxlnro29cV2Iv98joWxFfy+jc4xWT6vdYeGR0Vf3dw+O+ufpkdA71H5M8MDyfOLRXkj8darkrycZ57/WrGV2oNpvkV+a1b8zoB8KDST6W4RdurqVHkr/KaEr8Bxn97+PXJlHDxfo41h+L1PMvh3rdk9EPu1PnbX/FUJv7M+/biot99odxf+dQ588kOWFof/7wenZY/4rVrsUYa/pzGU3935Nk5/B4u3E69noap0de09cl+a+hdvcm+d0jrcO4aj2Jh99ADQB0zW+gBgC6JgwBAF0ThgCArglDAEDXhCEAoGvCEADQNWEIWFJV/fvwPF1Vv7zax5MkVfXjVfXZ5bcEWJ7fMwQclqo6N8mHW2vvWOXjmGo/vJkjwLNmZghYUlV9b1j8aJKfr6qdVfXbw52t/6CqvjzcHfzXh+3PraovVtWnq+prVfXRqrqkqu6sql1V9col+vpEVV1fVf867PuOof29VfWZqvr7JLcPs1T3DuvWVdUfDu99T1VtGdrfMBzHXVV12yG3aAB42tTymwAkSS7PvJmhqtqc0b2yfqaqTkjyb1V1+7DtGRnd6fqxJA8lubG19saquizJliQfWqKf6STnZHTX8TuqamZoPzvJ61prj1XV9LztNyc5LcnrW2v7q+rE4ead1yW5uLX2aFX9UpKrM7qFBcBBhCHgSF2Y5HVVNXdjxRdldDPG7yf5chvdNDRV9WCSuZC0K8l5y7zvp9vobuMPVNVDSX5yaN/eWlvoruAXJLl+7tTZEJZem+S1SbaP7uWZdRndZw3gGYQh4EhVki2ttdsOahxdW/TkvKYD814fyPI/dw69kHHu9b4ljuPQfSrJfa21s5fpC8A1Q8Bh+26SF857fVuSDwynpFJVr66q9WPo591VddxwbdErMrrj9VJuT/L+qpoajuPEYZ9Tqursoe34qvrpMRwbsAaZGQIO1z1J9lfV3Uk+keSajK7v+UqNzkU9muSdY+jn/iRfTPKSJO9vrf3fcKprMTcmeXWSe6rqB0n+vLX2seH03bVV9aKMftb9SZL7xnB8wBrjq/XAc0ZVfSLJP7TW/A4hYGKcJgMAuuY0GTBxVXVFkncf0vyZ1tp7V+FwgM45TQYAdM1pMgCga8IQANA1YQgA6JowBAB0TRgCALr2/1ExEFoHwLMeAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 720x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sales_train = sales_train[sales_train['item_cnt_day'] <1001]\n",
    "sales_train = sales_train[sales_train['item_price'] < 300000]\n",
    "plt.figure(figsize=(10,4))\n",
    "plt.xlim(-100,3000)\n",
    "sns.boxplot(x = sales_train['item_cnt_day'])\n",
    "\n",
    "plt.figure(figsize=(10,4))\n",
    "plt.xlim(-10000,320000)\n",
    "sns.boxplot(x = sales_train['item_price'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>date_block_num</th>\n",
       "      <th>shop_id</th>\n",
       "      <th>item_id</th>\n",
       "      <th>item_price</th>\n",
       "      <th>item_cnt_day</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>484683</th>\n",
       "      <td>15.05.2013</td>\n",
       "      <td>4</td>\n",
       "      <td>32</td>\n",
       "      <td>2973</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              date  date_block_num  shop_id  item_id  item_price  item_cnt_day\n",
       "484683  15.05.2013               4       32     2973        -1.0           1.0"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sales_train[sales_train['item_price']<0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1874.0\n"
     ]
    }
   ],
   "source": [
    "median = sales_train[(sales_train['date_block_num'] == 4) & (sales_train['shop_id'] == 32)\\\n",
    "                     & (sales_train['item_id'] == 2973) & (sales_train['item_price']>0)].item_price.median()\n",
    "sales_train.loc[sales_train['item_price']<0,'item_price'] = median\n",
    "print(median)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1. Number of good pairs: 111404\n",
      "2. No Data Items: 15246\n",
      "3. Only Item_id Info: 87550\n"
     ]
    }
   ],
   "source": [
    "test = pd.read_csv(r'C:\\Users\\neversleep\\Desktop\\机器学习课设2\\future\\test.csv')\n",
    "good_sales = test.merge(sales_train, on=['item_id','shop_id'], how='left').dropna()\n",
    "good_pairs = test[test['ID'].isin(good_sales['ID'])]\n",
    "no_data_items = test[~(test['item_id'].isin(sales_train['item_id']))]\n",
    "\n",
    "print('1. Number of good pairs:', len(good_pairs))\n",
    "print('2. No Data Items:', len(no_data_items))\n",
    "print('3. Only Item_id Info:', len(test)-len(no_data_items)-len(good_pairs))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>shop_id</th>\n",
       "      <th>item_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5320</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>5268</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>45</td>\n",
       "      <td>5</td>\n",
       "      <td>5826</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64</th>\n",
       "      <td>64</td>\n",
       "      <td>5</td>\n",
       "      <td>3538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>65</td>\n",
       "      <td>5</td>\n",
       "      <td>3571</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    ID  shop_id  item_id\n",
       "1    1        5     5320\n",
       "4    4        5     5268\n",
       "45  45        5     5826\n",
       "64  64        5     3538\n",
       "65  65        5     3571"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "no_data_items.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>shop_name</th>\n",
       "      <th>shop_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>!Якутск Орджоникидзе, 56 фран</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>!Якутск ТЦ \"Центральный\" фран</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Адыгея ТЦ \"Мега\"</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Балашиха ТРК \"Октябрь-Киномир\"</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Волжский ТЦ \"Волга Молл\"</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        shop_name  shop_id\n",
       "0   !Якутск Орджоникидзе, 56 фран        0\n",
       "1   !Якутск ТЦ \"Центральный\" фран        1\n",
       "2                Адыгея ТЦ \"Мега\"        2\n",
       "3  Балашиха ТРК \"Октябрь-Киномир\"        3\n",
       "4        Волжский ТЦ \"Волга Молл\"        4"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "shops = pd.read_csv(r'C:\\Users\\neversleep\\Desktop\\机器学习课设2\\future\\shops.csv')\n",
    "shops.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1.        , 0.69647514],\n",
       "       [0.69647514, 1.        ]])"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sales12 = np.array(sales_by_shop_id.loc[sales_by_shop_id['shop_id'] == 12 ].values)\n",
    "sales12 = sales12[:,1:].reshape(-1)\n",
    "sales55 = np.array(sales_by_shop_id.loc[sales_by_shop_id['shop_id'] == 55 ].values)\n",
    "sales55 = sales55[:,1:].reshape(-1)\n",
    "months = np.array(sales_by_shop_id.loc[sales_by_shop_id['shop_id'] == 12 ].columns[1:])\n",
    "np.corrcoef(sales12,sales55)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 2,  3,  4,  5,  6,  7, 10, 12, 14, 15, 16, 18, 19, 21, 22, 24, 25,\n",
       "       26, 28, 31, 34, 35, 36, 37, 38, 39, 41, 42, 44, 45, 46, 47, 48, 49,\n",
       "       50, 52, 53, 55, 56, 57, 58, 59], dtype=int64)"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.shop_id.sort_values().unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "sales_train.loc[sales_train['shop_id'] == 0,'shop_id'] = 57\n",
    "sales_train.loc[sales_train['shop_id'] == 1,'shop_id'] = 58\n",
    "sales_train.loc[sales_train['shop_id'] == 11,'shop_id'] = 10\n",
    "sales_train.loc[sales_train['shop_id'] == 40,'shop_id'] = 39"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>shop_name</th>\n",
       "      <th>shop_id</th>\n",
       "      <th>shop_city</th>\n",
       "      <th>shop_type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>якутск орджоникидзе  фран</td>\n",
       "      <td>0</td>\n",
       "      <td>якутск</td>\n",
       "      <td>NO_DATA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>якутск тц центральный фран</td>\n",
       "      <td>1</td>\n",
       "      <td>якутск</td>\n",
       "      <td>тц</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>адыгея тц мега</td>\n",
       "      <td>2</td>\n",
       "      <td>адыгея</td>\n",
       "      <td>тц</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>балашиха трк октябрькиномир</td>\n",
       "      <td>3</td>\n",
       "      <td>балашиха</td>\n",
       "      <td>трк</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>волжский тц волга молл</td>\n",
       "      <td>4</td>\n",
       "      <td>волжский</td>\n",
       "      <td>тц</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     shop_name  shop_id shop_city shop_type\n",
       "0    якутск орджоникидзе  фран        0    якутск   NO_DATA\n",
       "1   якутск тц центральный фран        1    якутск        тц\n",
       "2               адыгея тц мега        2    адыгея        тц\n",
       "3  балашиха трк октябрькиномир        3  балашиха       трк\n",
       "4       волжский тц волга молл        4  волжский        тц"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "shops['shop_name'] = shops['shop_name'].apply(lambda x: x.lower()).str.replace('[^\\w\\s]', '').str.replace('\\d+','').str.strip()\n",
    "shops['shop_city'] = shops['shop_name'].str.partition(' ')[0]\n",
    "shops['shop_type'] = shops['shop_name'].apply(lambda x: 'мтрц' if 'мтрц' in x else 'трц' if 'трц' in x else 'трк' if 'трк' in x else 'тц' if 'тц' in x else 'тк' if 'тк' in x else 'NO_DATA')\n",
    "shops.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>shop_name</th>\n",
       "      <th>shop_id</th>\n",
       "      <th>shop_city</th>\n",
       "      <th>shop_type</th>\n",
       "      <th>shop_city_code</th>\n",
       "      <th>shop_type_code</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>якутск орджоникидзе  фран</td>\n",
       "      <td>0</td>\n",
       "      <td>якутск</td>\n",
       "      <td>NO_DATA</td>\n",
       "      <td>29</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>якутск тц центральный фран</td>\n",
       "      <td>1</td>\n",
       "      <td>якутск</td>\n",
       "      <td>тц</td>\n",
       "      <td>29</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>адыгея тц мега</td>\n",
       "      <td>2</td>\n",
       "      <td>адыгея</td>\n",
       "      <td>тц</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>балашиха трк октябрькиномир</td>\n",
       "      <td>3</td>\n",
       "      <td>балашиха</td>\n",
       "      <td>трк</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>волжский тц волга молл</td>\n",
       "      <td>4</td>\n",
       "      <td>волжский</td>\n",
       "      <td>тц</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     shop_name  shop_id shop_city shop_type  shop_city_code  \\\n",
       "0    якутск орджоникидзе  фран        0    якутск   NO_DATA              29   \n",
       "1   якутск тц центральный фран        1    якутск        тц              29   \n",
       "2               адыгея тц мега        2    адыгея        тц               0   \n",
       "3  балашиха трк октябрькиномир        3  балашиха       трк               1   \n",
       "4       волжский тц волга молл        4  волжский        тц               2   \n",
       "\n",
       "   shop_type_code  \n",
       "0               0  \n",
       "1               5  \n",
       "2               5  \n",
       "3               3  \n",
       "4               5  "
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn import preprocessing\n",
    "shops['shop_city_code'] = preprocessing.LabelEncoder().fit_transform(shops['shop_city'])\n",
    "shops['shop_type_code'] = preprocessing.LabelEncoder().fit_transform(shops['shop_type'])\n",
    "shops.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "categories = pd.read_csv(r'C:\\Users\\neversleep\\Desktop\\机器学习课设2\\future\\item_categories.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>item_category_name</th>\n",
       "      <th>item_category_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>PC - Гарнитуры/Наушники</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Аксессуары - PS2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Аксессуары - PS3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Аксессуары - PS4</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Аксессуары - PSP</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        item_category_name  item_category_id\n",
       "0  PC - Гарнитуры/Наушники                 0\n",
       "1         Аксессуары - PS2                 1\n",
       "2         Аксессуары - PS3                 2\n",
       "3         Аксессуары - PS4                 3\n",
       "4         Аксессуары - PSP                 4"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "categories.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>item_category_id</th>\n",
       "      <th>type</th>\n",
       "      <th>subtype</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>PC</td>\n",
       "      <td>Гарнитуры/Наушники</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Аксессуары</td>\n",
       "      <td>PS2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>Аксессуары</td>\n",
       "      <td>PS3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>Аксессуары</td>\n",
       "      <td>PS4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>Аксессуары</td>\n",
       "      <td>PSP</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   item_category_id        type             subtype\n",
       "0                 0          PC  Гарнитуры/Наушники\n",
       "1                 1  Аксессуары                 PS2\n",
       "2                 2  Аксессуары                 PS3\n",
       "3                 3  Аксессуары                 PS4\n",
       "4                 4  Аксессуары                 PSP"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "categories['split'] = categories['item_category_name'].str.split('-')\n",
    "categories['type'] = categories['split'].map(lambda x:x[0].strip())\n",
    "categories['subtype'] = categories['split'].map(lambda x:x[1].strip() if len(x)>1 else x[0].strip())\n",
    "categories = categories[['item_category_id','type','subtype']]\n",
    "categories.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>item_category_id</th>\n",
       "      <th>type</th>\n",
       "      <th>subtype</th>\n",
       "      <th>cat_type_code</th>\n",
       "      <th>cat_subtype_code</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>PC</td>\n",
       "      <td>Гарнитуры/Наушники</td>\n",
       "      <td>0</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Аксессуары</td>\n",
       "      <td>PS2</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>Аксессуары</td>\n",
       "      <td>PS3</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>Аксессуары</td>\n",
       "      <td>PS4</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>Аксессуары</td>\n",
       "      <td>PSP</td>\n",
       "      <td>1</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   item_category_id        type             subtype  cat_type_code  \\\n",
       "0                 0          PC  Гарнитуры/Наушники              0   \n",
       "1                 1  Аксессуары                 PS2              1   \n",
       "2                 2  Аксессуары                 PS3              1   \n",
       "3                 3  Аксессуары                 PS4              1   \n",
       "4                 4  Аксессуары                 PSP              1   \n",
       "\n",
       "   cat_subtype_code  \n",
       "0                29  \n",
       "1                 9  \n",
       "2                10  \n",
       "3                11  \n",
       "4                13  "
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "categories['cat_type_code'] = preprocessing.LabelEncoder().fit_transform(categories['type'])\n",
    "categories['cat_subtype_code'] = preprocessing.LabelEncoder().fit_transform(categories['subtype'])\n",
    "categories.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   date_block_num  shop_id  item_id  item_cnt_month\n",
      "0               0        2       19             0.0\n",
      "1               0        2       27             1.0\n",
      "2               0        2       28             0.0\n",
      "3               0        2       29             0.0\n",
      "4               0        2       32             0.0\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "# from numpy import multiply, product\n",
    "from itertools import product\n",
    "# import product from product\n",
    "ts = time.time()\n",
    "matrix = []\n",
    "cols = ['date_block_num','shop_id','item_id']\n",
    "for i in range(34):\n",
    "    sales = sales_train[sales_train.date_block_num==i]\n",
    "    matrix.append(np.array(list(product([i], sales.shop_id.unique(), sales.item_id.unique())), dtype='int16'))\n",
    "    \n",
    "matrix = pd.DataFrame(np.vstack(matrix), columns=cols)\n",
    "matrix['date_block_num'] = matrix['date_block_num'].astype(np.int8)\n",
    "matrix['shop_id'] = matrix['shop_id'].astype(np.int8)\n",
    "matrix['item_id'] = matrix['item_id'].astype(np.int16)\n",
    "matrix.sort_values(cols,inplace=True)\n",
    "time.time() - ts\n",
    "\n",
    "sales_train['revenue'] = sales_train['item_price'] *  sales_train['item_cnt_day']\n",
    "\n",
    "groupby = sales_train.groupby(['item_id','shop_id','date_block_num']).agg({'item_cnt_day':'sum'})\n",
    "groupby.columns = ['item_cnt_month']\n",
    "groupby.reset_index(inplace=True)\n",
    "matrix = matrix.merge(groupby, on = ['item_id','shop_id','date_block_num'], how = 'left')\n",
    "matrix['item_cnt_month'] = matrix['item_cnt_month'].fillna(0).clip(0,20).astype(np.float16)\n",
    "matrix.head()\n",
    "\n",
    "test['date_block_num'] = 34\n",
    "test['date_block_num'] = test['date_block_num'].astype(np.int8)\n",
    "test['shop_id'] = test['shop_id'].astype(np.int8)\n",
    "test['item_id'] = test['item_id'].astype(np.int16)\n",
    "test.shape\n",
    "\n",
    "cols = ['date_block_num','shop_id','item_id']\n",
    "matrix = pd.concat([matrix, test[['item_id','shop_id','date_block_num']]], ignore_index=True, sort=False, keys=cols)\n",
    "matrix.fillna(0, inplace=True) # 34 month\n",
    "print(matrix.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "0\n"
     ]
    }
   ],
   "source": [
    "print(matrix['item_cnt_month'].isna().sum())\n",
    "print(matrix['item_cnt_month'].isnull().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4.0520806312561035"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts = time.time()\n",
    "matrix = matrix.merge(item_categories[['item_id','item_category_id']], on = ['item_id'], how = 'left')\n",
    "matrix = matrix.merge(categories[['item_category_id','cat_type_code','cat_subtype_code']], on = ['item_category_id'], how = 'left')\n",
    "matrix = matrix.merge(shops[['shop_id','shop_city_code','shop_type_code']], on = ['shop_id'], how = 'left')\n",
    "matrix['shop_city_code'] = matrix['shop_city_code'].astype(np.int8)\n",
    "matrix['shop_type_code'] = matrix['shop_type_code'].astype(np.int8)\n",
    "matrix['item_category_id'] = matrix['item_category_id'].astype(np.int8)\n",
    "matrix['cat_type_code'] = matrix['cat_type_code'].astype(np.int8)\n",
    "matrix['cat_subtype_code'] = matrix['cat_subtype_code'].astype(np.int8)\n",
    "time.time() - ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date_block_num</th>\n",
       "      <th>shop_id</th>\n",
       "      <th>item_id</th>\n",
       "      <th>item_cnt_month</th>\n",
       "      <th>item_category_id</th>\n",
       "      <th>cat_type_code</th>\n",
       "      <th>cat_subtype_code</th>\n",
       "      <th>shop_city_code</th>\n",
       "      <th>shop_type_code</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>19</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40</td>\n",
       "      <td>11</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>27</td>\n",
       "      <td>1.0</td>\n",
       "      <td>19</td>\n",
       "      <td>5</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>28</td>\n",
       "      <td>0.0</td>\n",
       "      <td>30</td>\n",
       "      <td>8</td>\n",
       "      <td>55</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>29</td>\n",
       "      <td>0.0</td>\n",
       "      <td>23</td>\n",
       "      <td>5</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>32</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40</td>\n",
       "      <td>11</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   date_block_num  shop_id  item_id  item_cnt_month  item_category_id  \\\n",
       "0               0        2       19             0.0                40   \n",
       "1               0        2       27             1.0                19   \n",
       "2               0        2       28             0.0                30   \n",
       "3               0        2       29             0.0                23   \n",
       "4               0        2       32             0.0                40   \n",
       "\n",
       "   cat_type_code  cat_subtype_code  shop_city_code  shop_type_code  \n",
       "0             11                 4               0               5  \n",
       "1              5                10               0               5  \n",
       "2              8                55               0               5  \n",
       "3              5                16               0               5  \n",
       "4             11                 4               0               5  "
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "matrix.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 11056277 entries, 0 to 11056276\n",
      "Data columns (total 9 columns):\n",
      " #   Column            Dtype  \n",
      "---  ------            -----  \n",
      " 0   date_block_num    int8   \n",
      " 1   shop_id           int8   \n",
      " 2   item_id           int16  \n",
      " 3   item_cnt_month    float16\n",
      " 4   item_category_id  int8   \n",
      " 5   cat_type_code     int8   \n",
      " 6   cat_subtype_code  int8   \n",
      " 7   shop_city_code    int8   \n",
      " 8   shop_type_code    int8   \n",
      "dtypes: float16(1), int16(1), int8(7)\n",
      "memory usage: 200.3 MB\n"
     ]
    }
   ],
   "source": [
    "matrix.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "def lag_feature(df, lags, col):\n",
    "    tmp = df[['date_block_num','shop_id','item_id',col]]\n",
    "    for i in lags:\n",
    "        shifted = tmp.copy()\n",
    "        shifted.columns = ['date_block_num','shop_id','item_id', col+'_lag_'+str(i)]\n",
    "        shifted['date_block_num'] += i\n",
    "        df = pd.merge(df, shifted, on=['date_block_num','shop_id','item_id'], how='left')\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "45.14803719520569"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts = time.time()\n",
    "matrix = lag_feature(matrix, [1,2,3,6,12], 'item_cnt_month')\n",
    "time.time() - ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ts = time.time()\n",
    "group = matrix.groupby(['date_block_num']).agg({'item_cnt_month': ['mean']})\n",
    "group.columns = [ 'date_avg_item_cnt' ]\n",
    "group.reset_index(inplace=True)\n",
    "\n",
    "matrix = pd.merge(matrix, group, on=['date_block_num'], how='left')\n",
    "matrix['date_avg_item_cnt'] = matrix['date_avg_item_cnt'].astype(np.float16)\n",
    "matrix = lag_feature(matrix, [1,2,3,6,12], 'date_avg_item_cnt')\n",
    "matrix.drop(['date_avg_item_cnt'], axis=1, inplace=True)\n",
    "time.time() - ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ts = time.time()\n",
    "group = matrix.groupby(['date_block_num']).agg({'item_cnt_month': ['mean']})\n",
    "group.columns = [ 'date_avg_item_cnt' ]\n",
    "group.reset_index(inplace=True)\n",
    "\n",
    "matrix = pd.merge(matrix, group, on=['date_block_num'], how='left')\n",
    "matrix['date_avg_item_cnt'] = matrix['date_avg_item_cnt'].astype(np.float16)\n",
    "matrix = lag_feature(matrix, [1,2,3,6,12], 'date_avg_item_cnt')\n",
    "matrix.drop(['date_avg_item_cnt'], axis=1, inplace=True)\n",
    "time.time() - ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ts = time.time()\n",
    "group = matrix.groupby(['date_block_num', 'item_id']).agg({'item_cnt_month': ['mean']})\n",
    "group.columns = [ 'date_item_avg_item_cnt' ]\n",
    "group.reset_index(inplace=True)\n",
    "\n",
    "matrix = pd.merge(matrix, group, on=['date_block_num','item_id'], how='left')\n",
    "matrix['date_item_avg_item_cnt'] = matrix['date_item_avg_item_cnt'].astype(np.float16)\n",
    "matrix = lag_feature(matrix, [1,2,3,6,12], 'date_item_avg_item_cnt')\n",
    "matrix.drop(['date_item_avg_item_cnt'], axis=1, inplace=True)\n",
    "time.time() - ts"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
